Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
76f4b1fc
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
76f4b1fc
编写于
6月 23, 2019
作者:
C
Chunwei
浏览文件
操作
浏览文件
下载
差异文件
Init OpenCL support for lite framework.
上级
c1cc27d0
b3058944
变更
29
展开全部
显示空白变更内容
内联
并排
Showing
29 changed file
with
3566 addition
and
0 deletion
+3566
-0
CMakeLists.txt
CMakeLists.txt
+7
-0
cmake/configure.cmake
cmake/configure.cmake
+4
-0
cmake/external/opencl-clhpp.cmake
cmake/external/opencl-clhpp.cmake
+36
-0
cmake/external/opencl-headers.cmake
cmake/external/opencl-headers.cmake
+33
-0
paddle/fluid/lite/CMakeLists.txt
paddle/fluid/lite/CMakeLists.txt
+1
-0
paddle/fluid/lite/opencl/CMakeLists.txt
paddle/fluid/lite/opencl/CMakeLists.txt
+19
-0
paddle/fluid/lite/opencl/cl2_header.h
paddle/fluid/lite/opencl/cl2_header.h
+21
-0
paddle/fluid/lite/opencl/cl_caller.cc
paddle/fluid/lite/opencl/cl_caller.cc
+88
-0
paddle/fluid/lite/opencl/cl_caller.h
paddle/fluid/lite/opencl/cl_caller.h
+30
-0
paddle/fluid/lite/opencl/cl_context.cc
paddle/fluid/lite/opencl/cl_context.cc
+73
-0
paddle/fluid/lite/opencl/cl_context.h
paddle/fluid/lite/opencl/cl_context.h
+43
-0
paddle/fluid/lite/opencl/cl_engine.cc
paddle/fluid/lite/opencl/cl_engine.cc
+171
-0
paddle/fluid/lite/opencl/cl_engine.h
paddle/fluid/lite/opencl/cl_engine.h
+96
-0
paddle/fluid/lite/opencl/cl_half.cc
paddle/fluid/lite/opencl/cl_half.cc
+518
-0
paddle/fluid/lite/opencl/cl_half.h
paddle/fluid/lite/opencl/cl_half.h
+32
-0
paddle/fluid/lite/opencl/cl_helper.cc
paddle/fluid/lite/opencl/cl_helper.cc
+90
-0
paddle/fluid/lite/opencl/cl_helper.h
paddle/fluid/lite/opencl/cl_helper.h
+52
-0
paddle/fluid/lite/opencl/cl_image.cc
paddle/fluid/lite/opencl/cl_image.cc
+164
-0
paddle/fluid/lite/opencl/cl_image.h
paddle/fluid/lite/opencl/cl_image.h
+118
-0
paddle/fluid/lite/opencl/cl_image_converter.cc
paddle/fluid/lite/opencl/cl_image_converter.cc
+450
-0
paddle/fluid/lite/opencl/cl_image_converter.h
paddle/fluid/lite/opencl/cl_image_converter.h
+115
-0
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
+34
-0
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
+27
-0
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
+91
-0
paddle/fluid/lite/opencl/cl_test.cc
paddle/fluid/lite/opencl/cl_test.cc
+154
-0
paddle/fluid/lite/opencl/cl_tool.cc
paddle/fluid/lite/opencl/cl_tool.cc
+84
-0
paddle/fluid/lite/opencl/cl_tool.h
paddle/fluid/lite/opencl/cl_tool.h
+32
-0
paddle/fluid/lite/opencl/cl_wrapper.cc
paddle/fluid/lite/opencl/cl_wrapper.cc
+962
-0
paddle/fluid/lite/tools/build.sh
paddle/fluid/lite/tools/build.sh
+21
-0
未找到文件。
CMakeLists.txt
浏览文件 @
76f4b1fc
...
@@ -150,6 +150,7 @@ option(WITH_LITE "Enable lite framework" OFF)
...
@@ -150,6 +150,7 @@ option(WITH_LITE "Enable lite framework" OFF)
option
(
LITE_WITH_CUDA
"Enable CUDA in lite mode"
OFF
)
option
(
LITE_WITH_CUDA
"Enable CUDA in lite mode"
OFF
)
option
(
LITE_WITH_X86
"Enable X86 in lite mode"
ON
)
option
(
LITE_WITH_X86
"Enable X86 in lite mode"
ON
)
option
(
LITE_WITH_ARM
"Enable ARM in lite mode"
OFF
)
option
(
LITE_WITH_ARM
"Enable ARM in lite mode"
OFF
)
option
(
LITE_WITH_OPENCL
"Enable OpenCL support in lite"
OFF
)
option
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
"Enable light-weight framework"
OFF
)
option
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
"Enable light-weight framework"
OFF
)
option
(
LITE_WITH_PROFILE
"Enable profile mode in lite framework"
OFF
)
option
(
LITE_WITH_PROFILE
"Enable profile mode in lite framework"
OFF
)
...
@@ -181,6 +182,12 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
...
@@ -181,6 +182,12 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
include
(
external/eigen
)
# download eigen3
include
(
external/eigen
)
# download eigen3
include
(
ccache
)
# set ccache for compilation
include
(
ccache
)
# set ccache for compilation
# for opencl
if
(
LITE_WITH_OPENCL
)
include
(
external/opencl-headers
)
include
(
external/opencl-clhpp
)
endif
()
include
(
generic
)
# simplify cmake module
include
(
generic
)
# simplify cmake module
include
(
configure
)
# add paddle env configuration
include
(
configure
)
# add paddle env configuration
...
...
cmake/configure.cmake
浏览文件 @
76f4b1fc
...
@@ -176,6 +176,10 @@ if (LITE_WITH_ARM)
...
@@ -176,6 +176,10 @@ if (LITE_WITH_ARM)
add_definitions
(
"-DLITE_WITH_ARM"
)
add_definitions
(
"-DLITE_WITH_ARM"
)
endif
()
endif
()
if
(
LITE_WITH_OPENCL
)
add_definitions
(
"-DLITE_WITH_OPENCL"
)
endif
()
if
(
LITE_WITH_PROFILE
)
if
(
LITE_WITH_PROFILE
)
add_definitions
(
"-DLITE_WITH_PROFILE"
)
add_definitions
(
"-DLITE_WITH_PROFILE"
)
endif
()
endif
()
...
...
cmake/external/opencl-clhpp.cmake
0 → 100644
浏览文件 @
76f4b1fc
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE
(
ExternalProject
)
SET
(
OPENCL_CLHPP_SRCS_DIR
${
THIRD_PARTY_PATH
}
/opencl-clhpp
)
SET
(
OPENCL_CLHPP_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/opencl-clhpp
)
SET
(
OPENCL_CLHPP_INCLUDE_DIR
"
${
OPENCL_CLHPP_INSTALL_DIR
}
"
CACHE PATH
"opencl-clhpp include directory."
FORCE
)
INCLUDE_DIRECTORIES
(
${
OPENCL_CLHPP_INCLUDE_DIR
}
)
ExternalProject_Add
(
opencl_clhpp
GIT_REPOSITORY
"https://github.com/KhronosGroup/OpenCL-CLHPP.git"
GIT_TAG
"v2.0.10"
PREFIX
"
${
OPENCL_CLHPP_SRCS_DIR
}
"
CMAKE_ARGS -DBUILD_DOCS=OFF
-DBUILD_EXAMPLES=OFF
-DBUILD_TESTS=OFF
-DCMAKE_INSTALL_PREFIX=
${
OPENCL_CLHPP_INSTALL_DIR
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
OPENCL_CLHPP_INSTALL_DIR
}
-DCMAKE_BUILD_TYPE:STRING=
${
THIRD_PARTY_BUILD_TYPE
}
)
ADD_DEPENDENCIES
(
opencl_clhpp opencl_headers
)
cmake/external/opencl-headers.cmake
0 → 100644
浏览文件 @
76f4b1fc
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE
(
ExternalProject
)
SET
(
OPENCL_HEADERS_SRCS_DIR
${
THIRD_PARTY_PATH
}
/opencl-headers
)
SET
(
OPENCL_HEADERS_INCLUDE_DIR
"
${
OPENCL_HEADERS_SRCS_DIR
}
/src/opencl_headers"
CACHE PATH
"opencl-headers include directory."
FORCE
)
INCLUDE_DIRECTORIES
(
${
OPENCL_HEADERS_INCLUDE_DIR
}
)
ExternalProject_Add
(
opencl_headers
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/KhronosGroup/OpenCL-Headers.git"
GIT_TAG
"c5a4bbeabb10d8ed3d1c651b93aa31737bc473dd"
PREFIX
${
OPENCL_HEADERS_SRCS_DIR
}
DOWNLOAD_NAME
"OpenCL-Headers"
CONFIGURE_COMMAND
""
BUILD_COMMAND
""
INSTALL_COMMAND
""
TEST_COMMAND
""
)
paddle/fluid/lite/CMakeLists.txt
浏览文件 @
76f4b1fc
...
@@ -182,6 +182,7 @@ add_subdirectory(x86)
...
@@ -182,6 +182,7 @@ add_subdirectory(x86)
add_subdirectory
(
arm
)
add_subdirectory
(
arm
)
add_subdirectory
(
host
)
add_subdirectory
(
host
)
add_subdirectory
(
cuda
)
add_subdirectory
(
cuda
)
add_subdirectory
(
opencl
)
add_subdirectory
(
model_parser
)
add_subdirectory
(
model_parser
)
add_subdirectory
(
utils
)
add_subdirectory
(
utils
)
add_subdirectory
(
api
)
add_subdirectory
(
api
)
...
...
paddle/fluid/lite/opencl/CMakeLists.txt
0 → 100644
浏览文件 @
76f4b1fc
if
(
NOT LITE_WITH_OPENCL
)
return
()
endif
()
if
(
WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
cc_library
(
cl_wrapper SRCS cl_wrapper.cc
)
cc_library
(
cl_tool SRCS cl_tool.cc
)
target_compile_options
(
cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers
)
cc_library
(
cl_half SRCS cl_half.cc
)
target_compile_options
(
cl_half BEFORE PUBLIC -fno-strict-aliasing
)
cc_library
(
cl_engine SRCS cl_engine.cc DEPS cl_tool
)
cc_library
(
cl_context SRCS cl_context.cc DEPS cl_engine
)
cc_library
(
cl_helper SRCS cl_helper.cc DEPS cl_context
)
cc_library
(
cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor
)
cc_library
(
cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine
)
cc_library
(
cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image
)
lite_cc_test
(
test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller cl_wrapper
)
add_dependencies
(
cl_tool opencl_clhpp
)
endif
()
paddle/fluid/lite/opencl/cl2_header.h
0 → 100644
浏览文件 @
76f4b1fc
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#define CL_TARGET_OPENCL_VERSION 200
#define CL_HPP_TARGET_OPENCL_VERSION 200
#define CL_HPP_MINIMUM_OPENCL_VERSION 110
#include <CL/cl2.hpp>
paddle/fluid/lite/opencl/cl_caller.cc
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
static
void
CopyImageData
(
const
CLImage
&
cl_image
,
float
*
out
)
{
int
width
=
cl_image
.
image_dims
()[
0
];
int
height
=
cl_image
.
image_dims
()[
1
];
half_t
*
image_data
=
new
half_t
[
height
*
width
*
4
];
cl
::
Image
*
image
=
cl_image
.
cl_image
();
const
std
::
array
<
size_t
,
3
>
origin
{
0
,
0
,
0
};
const
std
::
array
<
size_t
,
3
>
region
{
static_cast
<
size_t
>
(
width
),
static_cast
<
size_t
>
(
height
),
1
};
cl_int
err
=
CLEngine
::
Global
()
->
command_queue
().
enqueueReadImage
(
*
image
,
CL_TRUE
,
origin
,
region
,
0
,
0
,
image_data
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
err
);
auto
*
converter
=
cl_image
.
image_converter
();
converter
->
ImageToNCHW
(
image_data
,
out
,
cl_image
.
image_dims
(),
cl_image
.
tensor_dims
());
delete
[]
image_data
;
}
bool
InitOpenCLEngine
(
std
::
string
cl_path
)
{
auto
*
engine
=
CLEngine
::
Global
();
engine
->
set_cl_path
(
cl_path
);
return
engine
->
IsInitSuccess
();
}
void
elementwise_add
(
CLContext
*
context
,
float
*
in
,
const
DDim
&
in_dim
,
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
)
{
CLHelper
helper
(
context
);
helper
.
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
auto
kernel
=
helper
.
KernelAt
(
0
);
CLImage
in_image
;
in_image
.
set_tensor_data
(
in
,
in_dim
);
in_image
.
InitNormalCLImage
(
helper
.
OpenCLContext
());
VLOG
(
3
)
<<
" --- Inpu image: "
<<
in_image
<<
" --- "
;
CLImage
bias_image
;
bias_image
.
set_tensor_data
(
bias
,
bias_dim
);
bias_image
.
InitNormalCLImage
(
helper
.
OpenCLContext
());
VLOG
(
3
)
<<
" --- Bias image: "
<<
bias_image
<<
" --- "
;
CLImage
out_image
;
out_image
.
InitEmptyImage
(
helper
.
OpenCLContext
(),
out_dim
);
cl_int
status
;
status
=
kernel
.
setArg
(
0
,
*
in_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
1
,
*
bias_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
size_t
width
=
in_image
.
ImageWidth
();
size_t
height
=
in_image
.
ImageHeight
();
auto
global_work_size
=
cl
::
NDRange
{
width
,
height
};
status
=
helper
.
OpenCLCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
status
);
VLOG
(
3
)
<<
" --- Out image: "
<<
out_image
<<
" --- "
;
CopyImageData
(
out_image
,
out
);
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_caller.h
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
namespace
paddle
{
namespace
lite
{
bool
InitOpenCLEngine
(
std
::
string
cl_path
);
void
elementwise_add
(
CLContext
*
context
,
float
*
in
,
const
DDim
&
in_dim
,
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
);
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_context.cc
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <memory>
#include <string>
#include <utility>
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
cl
::
CommandQueue
&
CLContext
::
GetCommandQueue
()
{
return
CLEngine
::
Global
()
->
command_queue
();
}
cl
::
Context
&
CLContext
::
GetContext
()
{
return
CLEngine
::
Global
()
->
context
();
}
cl
::
Program
&
CLContext
::
GetProgram
(
const
std
::
string
&
file_name
,
const
std
::
string
&
options
)
{
std
::
string
program_key
=
file_name
;
if
(
!
options
.
empty
())
{
program_key
+=
options
;
}
auto
it
=
programs_
.
find
(
program_key
);
if
(
it
!=
programs_
.
end
())
{
VLOG
(
3
)
<<
" --- program -> "
<<
program_key
<<
" has been built --- "
;
return
*
(
it
->
second
);
}
auto
program
=
CLEngine
::
Global
()
->
CreateProgram
(
GetContext
(),
CLEngine
::
Global
()
->
cl_path
()
+
"/cl_kernel/"
+
file_name
);
VLOG
(
3
)
<<
" --- begin build program -> "
<<
program_key
<<
" --- "
;
CLEngine
::
Global
()
->
BuildProgram
(
program
.
get
(),
options
);
VLOG
(
3
)
<<
" --- end build program -> "
<<
program_key
<<
" --- "
;
programs_
[
program_key
]
=
std
::
move
(
program
);
return
*
(
programs_
[
program_key
]);
}
std
::
unique_ptr
<
cl
::
Kernel
>
CLContext
::
GetKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
)
{
cl_int
status
{
CL_SUCCESS
};
VLOG
(
3
)
<<
" --- to get program "
<<
file_name
<<
" --- "
;
auto
program
=
GetProgram
(
file_name
,
options
);
VLOG
(
3
)
<<
" --- end get program --- "
;
VLOG
(
3
)
<<
" --- to create kernel: "
<<
kernel_name
<<
" --- "
;
std
::
unique_ptr
<
cl
::
Kernel
>
kernel
(
new
cl
::
Kernel
(
program
,
kernel_name
.
c_str
(),
&
status
));
CL_CHECK_ERRORS
(
status
);
VLOG
(
3
)
<<
" --- end create kernel --- "
;
return
std
::
move
(
kernel
);
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_context.h
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "paddle/fluid/lite/opencl/cl2_header.h"
namespace
paddle
{
namespace
lite
{
class
CLContext
{
public:
cl
::
CommandQueue
&
GetCommandQueue
();
cl
::
Context
&
GetContext
();
cl
::
Program
&
GetProgram
(
const
std
::
string
&
file_name
,
const
std
::
string
&
options
);
std
::
unique_ptr
<
cl
::
Kernel
>
GetKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
);
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
cl
::
Program
>>
programs_
;
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_engine.cc
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include <glog/logging.h>
#include <string>
#include <utility>
#include <vector>
namespace
paddle
{
namespace
lite
{
CLEngine
*
CLEngine
::
Global
()
{
static
CLEngine
cl_engine_
;
cl_engine_
.
Init
();
return
&
cl_engine_
;
}
CLEngine
::~
CLEngine
()
{
if
(
command_queue_
!=
nullptr
)
{
command_queue_
->
finish
();
}
// For controlling the destruction order:
command_queue_
.
reset
();
context_
.
reset
();
device_
.
reset
();
platform_
.
reset
();
}
bool
CLEngine
::
Init
()
{
if
(
initialized_
)
{
return
true
;
}
bool
is_platform_init
=
InitializePlatform
();
bool
is_device_init
=
InitializeDevice
();
is_init_success_
=
is_platform_init
&&
is_device_init
;
initialized_
=
true
;
return
initialized_
;
}
cl
::
Platform
&
CLEngine
::
platform
()
{
CHECK
(
platform_
!=
nullptr
)
<<
"platform_ is not initialized!"
;
return
*
platform_
;
}
cl
::
Context
&
CLEngine
::
context
()
{
if
(
context_
==
nullptr
)
{
context_
=
CreateContext
();
}
return
*
context_
;
}
cl
::
Device
&
CLEngine
::
device
()
{
CHECK
(
device_
!=
nullptr
)
<<
"device_ is not initialized!"
;
return
*
device_
;
}
cl
::
CommandQueue
&
CLEngine
::
command_queue
()
{
if
(
command_queue_
==
nullptr
)
{
command_queue_
=
CreateCommandQueue
(
context
());
}
return
*
command_queue_
;
}
std
::
unique_ptr
<
cl
::
Program
>
CLEngine
::
CreateProgram
(
const
cl
::
Context
&
context
,
std
::
string
file_name
)
{
std
::
ifstream
file
{
file_name
,
std
::
ios
::
binary
|
std
::
ios
::
ate
};
CHECK
(
file
.
is_open
())
<<
"Can't open file from "
<<
file_name
;
auto
size
=
file
.
tellg
();
CHECK
(
size
>
0
)
<<
"size is too small."
;
std
::
string
content
(
size
,
'\0'
);
file
.
seekg
(
0
);
file
.
read
(
&
content
[
0
],
size
);
cl
::
Program
::
Sources
sources
;
sources
.
push_back
(
content
);
auto
prog
=
std
::
unique_ptr
<
cl
::
Program
>
(
new
cl
::
Program
(
context
,
sources
,
&
status_
));
LOG
(
INFO
)
<<
"OpenCL kernel file name: "
<<
file_name
;
LOG
(
INFO
)
<<
"Program source size: "
<<
content
.
size
();
CL_CHECK_ERRORS
(
status_
);
return
std
::
move
(
prog
);
}
std
::
unique_ptr
<
cl
::
UserEvent
>
CLEngine
::
CreateEvent
(
const
cl
::
Context
&
context
)
{
auto
event
=
std
::
unique_ptr
<
cl
::
UserEvent
>
(
new
cl
::
UserEvent
(
context
,
&
status_
));
CL_CHECK_ERRORS
(
status_
);
return
std
::
move
(
event
);
}
bool
CLEngine
::
BuildProgram
(
cl
::
Program
*
program
,
const
std
::
string
&
options
)
{
std
::
string
build_option
=
options
+
" -cl-fast-relaxed-math -I "
+
CLEngine
::
Global
()
->
cl_path
()
+
"/cl_kernel"
;
status_
=
program
->
build
({
*
device_
},
build_option
.
c_str
());
CL_CHECK_ERRORS
(
status_
);
if
(
status_
!=
CL_SUCCESS
)
{
if
(
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_STATUS
>
(
device
())
==
CL_BUILD_ERROR
)
{
std
::
string
log
=
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_LOG
>
(
device
());
LOG
(
INFO
)
<<
"Program build error: "
<<
log
;
}
return
false
;
}
return
true
;
}
bool
CLEngine
::
InitializePlatform
()
{
std
::
vector
<
cl
::
Platform
>
all_platforms
;
status_
=
cl
::
Platform
::
get
(
&
all_platforms
);
CL_CHECK_ERRORS
(
status_
);
if
(
all_platforms
.
empty
())
{
LOG
(
ERROR
)
<<
"No OpenCL platform found!"
;
return
false
;
}
platform_
=
std
::
make_shared
<
cl
::
Platform
>
();
*
platform_
=
all_platforms
[
0
];
return
true
;
}
bool
CLEngine
::
InitializeDevice
()
{
std
::
vector
<
cl
::
Device
>
all_devices
;
status_
=
platform_
->
getDevices
(
CL_DEVICE_TYPE_GPU
,
&
all_devices
);
CL_CHECK_ERRORS
(
status_
);
if
(
all_devices
.
empty
())
{
LOG
(
ERROR
)
<<
"No OpenCL GPU device found!"
;
return
false
;
}
device_
=
std
::
make_shared
<
cl
::
Device
>
();
*
device_
=
all_devices
[
0
];
auto
device_name
=
device_
->
getInfo
<
CL_DEVICE_NAME
>
();
LOG
(
INFO
)
<<
"Using device: "
<<
device_name
;
auto
image_support
=
device_
->
getInfo
<
CL_DEVICE_IMAGE_SUPPORT
>
();
if
(
image_support
)
{
LOG
(
INFO
)
<<
"The chosen device supports image processing."
;
}
else
{
LOG
(
ERROR
)
<<
"The chosen device doesn't support image processing!"
;
return
false
;
}
auto
ext_data
=
device_
->
getInfo
<
CL_DEVICE_EXTENSIONS
>
();
LOG
(
INFO
)
<<
"The extensions supported by this device: "
<<
ext_data
;
if
(
ext_data
.
find
(
"cl_khr_fp16"
)
!=
std
::
string
::
npos
)
{
LOG
(
INFO
)
<<
"The chosen device supports the half data type."
;
}
else
{
LOG
(
ERROR
)
<<
"The chosen device doesn't support the half data type!"
;
return
false
;
}
auto
max_units
=
device_
->
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
();
LOG
(
INFO
)
<<
"The chosen device has "
<<
max_units
<<
" compute units."
;
auto
local_mem
=
device_
->
getInfo
<
CL_DEVICE_LOCAL_MEM_SIZE
>
();
LOG
(
INFO
)
<<
"The local memory size of the chosen device is "
<<
static_cast
<
float
>
(
local_mem
)
/
1024
<<
" KB."
;
return
true
;
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_engine.h
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <fstream>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
class
CLEngine
{
public:
static
CLEngine
*
Global
();
bool
Init
();
cl
::
Platform
&
platform
();
cl
::
Context
&
context
();
cl
::
Device
&
device
();
cl
::
CommandQueue
&
command_queue
();
std
::
unique_ptr
<
cl
::
Program
>
CreateProgram
(
const
cl
::
Context
&
context
,
std
::
string
file_name
);
std
::
unique_ptr
<
cl
::
UserEvent
>
CreateEvent
(
const
cl
::
Context
&
context
);
bool
BuildProgram
(
cl
::
Program
*
program
,
const
std
::
string
&
options
=
""
);
bool
IsInitSuccess
()
{
return
is_init_success_
;
}
std
::
string
cl_path
()
{
return
cl_path_
;
}
void
set_cl_path
(
std
::
string
cl_path
)
{
cl_path_
=
cl_path
;
}
private:
CLEngine
()
=
default
;
~
CLEngine
();
bool
InitializePlatform
();
bool
InitializeDevice
();
std
::
shared_ptr
<
cl
::
Context
>
CreateContext
()
{
auto
context
=
std
::
make_shared
<
cl
::
Context
>
(
std
::
vector
<
cl
::
Device
>
{
device
()},
nullptr
,
nullptr
,
nullptr
,
&
status_
);
CL_CHECK_ERRORS
(
status_
);
return
context
;
}
std
::
shared_ptr
<
cl
::
CommandQueue
>
CreateCommandQueue
(
const
cl
::
Context
&
context
)
{
auto
queue
=
std
::
make_shared
<
cl
::
CommandQueue
>
(
context
,
device
(),
0
,
&
status_
);
CL_CHECK_ERRORS
(
status_
);
return
queue
;
}
std
::
string
cl_path_
;
std
::
shared_ptr
<
cl
::
Platform
>
platform_
{
nullptr
};
std
::
shared_ptr
<
cl
::
Context
>
context_
{
nullptr
};
std
::
shared_ptr
<
cl
::
Device
>
device_
{
nullptr
};
std
::
shared_ptr
<
cl
::
CommandQueue
>
command_queue_
{
nullptr
};
cl_int
status_
{
CL_SUCCESS
};
bool
initialized_
{
false
};
bool
is_init_success_
{
false
};
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_half.cc
0 → 100644
浏览文件 @
76f4b1fc
此差异已折叠。
点击以展开。
paddle/fluid/lite/opencl/cl_half.h
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
namespace
paddle
{
namespace
lite
{
typedef
uint16_t
half_t
;
half_t
Float2Half
(
float
f
);
float
Half2Float
(
half_t
h
);
void
FloatArray2HalfArray
(
float
*
f_array
,
half_t
*
h_array
,
int
count
);
void
HalfArray2FloatArray
(
half_t
*
h_array
,
float
*
f_array
,
int
count
);
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_helper.cc
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include <glog/logging.h>
#include <string>
#include <utility>
#include <vector>
namespace
paddle
{
namespace
lite
{
void
CLHelper
::
set_context
(
CLContext
*
context
)
{
context_
=
context
;
}
void
CLHelper
::
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
)
{
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
VLOG
(
3
)
<<
" --- begin to add kernel ---"
;
auto
kernel
=
context_
->
GetKernel
(
kernel_name
,
file_name
,
options
);
kernels
.
emplace_back
(
std
::
move
(
kernel
));
VLOG
(
3
)
<<
" --- end to add kernel --- "
;
}
cl
::
Kernel
&
CLHelper
::
KernelAt
(
const
int
index
)
{
VLOG
(
3
)
<<
" --- kernel count: "
<<
kernels
.
size
()
<<
" --- "
;
CHECK
(
static_cast
<
size_t
>
(
index
)
<
kernels
.
size
())
<<
"The index must be less than the size of kernels."
;
CHECK
(
kernels
[
index
]
!=
nullptr
)
<<
"The target kernel pointer cannot be null."
;
return
*
(
kernels
[
index
]);
}
cl
::
CommandQueue
&
CLHelper
::
OpenCLCommandQueue
()
{
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
return
context_
->
GetCommandQueue
();
}
cl
::
Context
&
CLHelper
::
OpenCLContext
()
{
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
return
context_
->
GetContext
();
}
cl
::
NDRange
CLHelper
::
DefaultWorkSize
(
const
CLImage
&
image
)
{
// n c h w
auto
image_dim
=
image
.
tensor_dims
();
if
(
image_dim
.
size
()
==
4
)
{
auto
n
=
image_dim
[
0
];
auto
h
=
image_dim
[
2
];
auto
w
=
image_dim
[
3
];
auto
image_width
=
image
.
ImageWidth
();
auto
work_size_0
=
image_width
/
w
;
auto
work_size_1
=
w
;
auto
work_size_2
=
n
*
h
;
return
cl
::
NDRange
{
static_cast
<
size_t
>
(
work_size_0
),
static_cast
<
size_t
>
(
work_size_1
),
static_cast
<
size_t
>
(
work_size_2
)};
}
else
if
(
image_dim
.
size
()
==
2
)
{
return
cl
::
NDRange
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
image
.
ImageWidth
()),
static_cast
<
size_t
>
(
image
.
ImageHeight
())};
}
else
if
(
image_dim
.
size
()
==
1
)
{
return
cl
::
NDRange
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
image
.
ImageWidth
()),
static_cast
<
size_t
>
(
1
)};
}
else
if
(
image_dim
.
size
()
==
3
)
{
auto
c
=
image_dim
[
0
];
auto
h
=
image_dim
[
1
];
auto
w
=
image_dim
[
2
];
return
cl
::
NDRange
{
static_cast
<
size_t
>
((
c
+
3
)
/
4
),
static_cast
<
size_t
>
(
w
),
static_cast
<
size_t
>
(
h
)};
}
else
{
LOG
(
FATAL
)
<<
"Not support this dimension, need to be implemented!"
;
return
cl
::
NDRange
{};
}
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_helper.h
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
namespace
paddle
{
namespace
lite
{
class
CLHelper
{
public:
CLHelper
()
=
default
;
explicit
CLHelper
(
CLContext
*
context
)
:
context_
(
context
)
{}
void
set_context
(
CLContext
*
context
);
void
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
=
""
);
cl
::
Kernel
&
KernelAt
(
const
int
index
);
cl
::
CommandQueue
&
OpenCLCommandQueue
();
cl
::
Context
&
OpenCLContext
();
cl
::
NDRange
DefaultWorkSize
(
const
CLImage
&
image
);
private:
CLContext
*
context_
{
nullptr
};
std
::
vector
<
std
::
unique_ptr
<
cl
::
Kernel
>>
kernels
;
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image.cc
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_image.h"
#include <glog/logging.h>
#include <array>
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_half.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
CLImage
&
cl_image
)
{
int
width
=
cl_image
.
image_dims_
[
0
];
int
height
=
cl_image
.
image_dims_
[
1
];
half_t
*
image_data
=
new
half_t
[
height
*
width
*
4
];
cl
::
Image
*
image
=
cl_image
.
cl_image
();
const
std
::
array
<
size_t
,
3
>
origin
{
0
,
0
,
0
};
const
std
::
array
<
size_t
,
3
>
region
{
static_cast
<
size_t
>
(
width
),
static_cast
<
size_t
>
(
height
),
1
};
cl_int
err
=
CLEngine
::
Global
()
->
command_queue
().
enqueueReadImage
(
*
image
,
CL_TRUE
,
origin
,
region
,
0
,
0
,
image_data
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
err
);
float
*
tensor_data
=
new
float
[
cl_image
.
numel
()];
auto
*
converter
=
cl_image
.
image_converter
();
converter
->
ImageToNCHW
(
image_data
,
tensor_data
,
cl_image
.
image_dims_
,
cl_image
.
tensor_dims_
);
int
stride
=
cl_image
.
numel
()
/
20
;
stride
=
stride
>
0
?
stride
:
1
;
os
<<
" dims: "
<<
cl_image
.
tensor_dims_
<<
"
\n
"
;
for
(
int
i
=
0
;
i
<
cl_image
.
numel
();
i
+=
stride
)
{
os
<<
tensor_data
[
i
]
<<
" "
;
}
delete
[]
tensor_data
;
delete
[]
image_data
;
return
os
;
}
void
CLImage
::
set_tensor_data
(
float
*
tensor_data
,
const
DDim
&
dim
)
{
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
auto
numel
=
dim
.
product
();
#else
auto
numel
=
dim
.
production
();
#endif
tensor_data_
.
reset
(
new
float
[
numel
]);
memcpy
(
tensor_data_
.
get
(),
tensor_data
,
numel
*
sizeof
(
float
));
tensor_dims_
=
dim
;
}
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call "
"set_tensohelper->DefaultWorkSize(out_"
"image)r_data first!"
;
image_converter_
.
reset
(
new
CLImageConverterFolder
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitNormalCLImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call set_tensor_data first!"
;
image_converter_
.
reset
(
new
CLImageConverterNormal
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitNImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call set_tensor_data first!"
;
CHECK
(
tensor_dims_
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
image_converter_
.
reset
(
new
CLImageConverterNWBlock
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitDWImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call set_tensor_data first!"
;
CHECK
(
tensor_dims_
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
image_converter_
.
reset
(
new
CLImageConverterDWBlock
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitEmptyImage
(
const
cl
::
Context
&
context
,
const
DDim
&
dim
)
{
CHECK
(
tensor_data_
==
nullptr
)
<<
" Empty image tensor data shouldn't have value"
;
tensor_dims_
=
dim
;
image_converter_
.
reset
(
new
CLImageConverterNormal
);
VLOG
(
3
)
<<
" to get image dims "
;
image_dims_
=
image_converter_
->
InitImageDimInfoWith
(
tensor_dims_
);
VLOG
(
3
)
<<
" end get image dims "
<<
image_dims_
;
InitCLImage
(
context
,
image_dims_
[
0
],
image_dims_
[
1
],
nullptr
);
cl_event_
=
CLEngine
::
Global
()
->
CreateEvent
(
context
);
initialized_
=
true
;
VLOG
(
3
)
<<
" end init cl image "
;
}
void
CLImage
::
InitEmptyWithImageDim
(
const
cl
::
Context
&
context
,
const
DDim
&
image_dims
)
{
VLOG
(
3
)
<<
" to get image dims "
;
image_dims_
=
image_dims
;
VLOG
(
3
)
<<
" end get image dims "
<<
image_dims_
;
InitCLImage
(
context
,
image_dims_
[
0
],
image_dims_
[
1
],
nullptr
);
cl_event_
=
CLEngine
::
Global
()
->
CreateEvent
(
context
);
initialized_
=
true
;
VLOG
(
3
)
<<
" end init cl image"
;
}
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
,
CLImageConverterBase
*
converter
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call set_tensor_data first!"
;
VLOG
(
3
)
<<
" begin init cl image "
;
image_dims_
=
converter
->
InitImageDimInfoWith
(
tensor_dims_
);
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
half_t
*
image_data
=
new
half_t
[
image_dims_
.
product
()
*
4
];
#else
half_t
*
image_data
=
new
half_t
[
image_dims_
.
production
()
*
4
];
#endif
VLOG
(
3
)
<<
" convert to image "
;
converter
->
NCHWToImage
(
tensor_data_
.
get
(),
image_data
,
tensor_dims_
);
VLOG
(
3
)
<<
" end convert to image "
;
InitCLImage
(
context
,
image_dims_
[
0
],
image_dims_
[
1
],
image_data
);
delete
[]
image_data
;
tensor_data_
=
nullptr
;
cl_event_
=
CLEngine
::
Global
()
->
CreateEvent
(
context
);
initialized_
=
true
;
VLOG
(
3
)
<<
" end init cl image "
;
}
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
,
int
width
,
int
height
,
void
*
data
)
{
cl
::
ImageFormat
img_format
(
CL_RGBA
,
CL_HALF_FLOAT
);
cl_int
err
;
cl_image_
.
reset
(
new
cl
::
Image2D
(
context
,
CL_MEM_READ_WRITE
|
(
data
?
CL_MEM_COPY_HOST_PTR
:
0
),
img_format
,
width
,
height
,
0
,
data
,
&
err
));
CL_CHECK_ERRORS
(
err
);
CHECK
(
err
==
CL_SUCCESS
)
<<
" Create image 2d error."
;
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image.h
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <iostream>
#include <memory>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_image_converter.h"
namespace
paddle
{
namespace
lite
{
class
CLImage
{
// For debug
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
CLImage
&
image
);
public:
CLImage
()
=
default
;
/*
* Will not hold input tensor data, memcpy in this method.
* */
void
set_tensor_data
(
float
*
tensor_data
,
const
DDim
&
dim
);
bool
IsInit
()
{
return
initialized_
;
}
/*
* Need call set_tensor_data first.
* Folder when one dim or two dim.
* */
void
InitCLImage
(
const
cl
::
Context
&
context
);
void
InitNormalCLImage
(
const
cl
::
Context
&
context
);
void
InitNImage
(
const
cl
::
Context
&
context
);
void
InitDWImage
(
const
cl
::
Context
&
context
);
void
InitEmptyImage
(
const
cl
::
Context
&
context
,
const
DDim
&
dim
);
void
InitEmptyWithImageDim
(
const
cl
::
Context
&
context
,
const
DDim
&
image_dims
);
cl
::
Image
*
cl_image
()
const
{
return
cl_image_
.
get
();
}
const
DDim
&
image_dims
()
const
{
return
image_dims_
;
}
inline
size_t
ImageWidth
()
const
{
return
image_dims_
[
0
];
}
inline
size_t
ImageHeight
()
const
{
return
image_dims_
[
1
];
}
const
DDim
&
tensor_dims
()
const
{
return
tensor_dims_
;
}
/*with_da
* Resize original tensor dim.
* */
inline
CLImage
&
Resize
(
const
DDim
&
dims
)
{
tensor_dims_
=
dims
;
return
*
this
;
}
template
<
typename
T
>
T
*
data
()
const
{
CHECK
(
!
initialized_
)
<<
"CL image has initialized, tensor data has been "
"deleted, can't use tensor data!"
;
return
reinterpret_cast
<
T
*>
(
tensor_data_
);
}
/*
* Numel of tensor dim
* */
inline
int64_t
numel
()
const
{
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
return
tensor_dims_
.
product
();
#else
return
tensor_dims_
.
production
();
#endif
}
/*
* Original tensor dim
* */
cl
::
UserEvent
&
cl_event
()
const
{
return
*
cl_event_
;
}
CLImageConverterBase
*
image_converter
()
const
{
return
image_converter_
.
get
();
}
private:
void
InitCLImage
(
const
cl
::
Context
&
context
,
CLImageConverterBase
*
converter
);
void
InitCLImage
(
const
cl
::
Context
&
context
,
int
width
,
int
height
,
void
*
data
);
bool
initialized_
=
false
;
std
::
unique_ptr
<
cl
::
Image2D
>
cl_image_
{
nullptr
};
std
::
unique_ptr
<
cl
::
UserEvent
>
cl_event_
{
nullptr
};
DDim
tensor_dims_
;
DDim
image_dims_
;
std
::
unique_ptr
<
float
>
tensor_data_
{
nullptr
};
std
::
unique_ptr
<
CLImageConverterBase
>
image_converter_
{
nullptr
};
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image_converter.cc
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_image_converter.h"
#include <glog/logging.h>
#include <vector>
namespace
paddle
{
namespace
lite
{
DDim
CLImageConverterDefault
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterDefault
::
NCHWToImage
(
float
*
nchw
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
DDim
in_image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
VLOG
(
3
)
<<
" tensor dim: "
<<
tensor_dim
;
VLOG
(
3
)
<<
" image dim: "
<<
in_image_dim
;
size_t
width
=
in_image_dim
[
0
];
size_t
w_block
=
width
/
W
;
float
*
p
=
nchw
;
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
w_block
*
4
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
if
(
c
<
C
)
{
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
image
[
i2
]
=
Float2Half
(
*
p
);
i2
+=
4
;
p
++
;
}
else
{
image
[
i2
]
=
0.0
;
i2
+=
4
;
}
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
void
CLImageConverterDefault
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
image_dim
[
0
];
float
*
p
=
tensor
;
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
*
p
=
Half2Float
(
image
[
i2
]);
i2
+=
4
;
p
++
;
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
DDim
CLImageConverterFolder
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
if
(
tensor_dim
.
size
()
<=
2
)
{
size_t
tdim
[
2
]
=
{
1
,
1
};
if
(
tensor_dim
.
size
()
==
1
)
{
tdim
[
1
]
=
tensor_dim
[
0
];
}
else
{
tdim
[
0
]
=
tensor_dim
[
0
];
tdim
[
1
]
=
tensor_dim
[
1
];
}
size_t
width
=
(
tdim
[
1
]
+
3
)
/
4
;
size_t
height
=
tdim
[
0
];
width_of_one_block_
=
width
;
height_of_one_block_
=
height
;
c_block_
=
1
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
else
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
width_of_one_block_
=
W
;
height_of_one_block_
=
H
;
c_block_
=
width
/
W
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
}
void
CLImageConverterFolder
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
<=
4
&&
tensor_dim
.
size
()
>
0
)
<<
" Tensor dim is not support!"
;
if
(
tensor_dim
.
size
()
>
2
)
{
CLImageConverterDefault
default_converter
;
default_converter
.
NCHWToImage
(
tensor
,
image
,
tensor_dim
);
}
else
{
size_t
tdim
[
2
]
=
{
1
,
1
};
if
(
tensor_dim
.
size
()
==
1
)
{
tdim
[
1
]
=
tensor_dim
[
0
];
}
else
{
tdim
[
0
]
=
tensor_dim
[
0
];
tdim
[
1
]
=
tensor_dim
[
1
];
}
DDim
image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
size_t
width
=
image_dim
[
0
];
for
(
size_t
h
=
0
;
h
<
tdim
[
0
];
h
++
)
{
for
(
size_t
w
=
0
;
w
<
tdim
[
1
];
w
++
)
{
image
[(
h
*
width
+
w
/
4
)
*
4
+
(
w
%
4
)]
=
Float2Half
(
tensor
[
h
*
tdim
[
1
]
+
w
]);
}
}
}
}
void
CLImageConverterFolder
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
if
(
tensor_dim
.
size
()
>
2
)
{
CLImageConverterDefault
default_converter
;
default_converter
.
ImageToNCHW
(
image
,
tensor
,
image_dim
,
tensor_dim
);
}
else
{
size_t
width
=
image_dim
[
0
];
size_t
H
=
1
,
W
=
1
;
if
(
tensor_dim
.
size
()
==
2
)
{
H
=
tensor_dim
[
0
];
W
=
tensor_dim
[
1
];
}
else
if
(
tensor_dim
.
size
()
==
1
)
{
W
=
tensor_dim
[
0
];
}
float
*
p
=
tensor
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
p
[
h
*
W
+
w
]
=
Half2Float
(
image
[(
h
*
width
+
w
/
4
)
*
4
+
(
w
%
4
)]);
}
}
}
}
DDim
CLImageConverterNWBlock
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
size_t
N
,
C
,
H
,
W
;
N
=
tensor_dim
[
0
];
C
=
tensor_dim
[
1
];
H
=
tensor_dim
[
2
];
W
=
tensor_dim
[
3
];
size_t
width
=
W
*
((
N
+
3
)
/
4
);
size_t
height
=
C
*
H
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterNWBlock
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
auto
image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
float
*
p
=
tensor
;
size_t
N
=
tensor_dim
[
0
];
size_t
C
=
tensor_dim
[
1
];
size_t
H
=
tensor_dim
[
2
];
size_t
W
=
tensor_dim
[
3
];
size_t
width
=
image_dim
[
0
];
size_t
height
=
image_dim
[
1
];
size_t
block
=
image_dim
[
0
]
/
tensor_dim
[
3
];
for
(
size_t
n
=
0
;
n
<
block
*
4
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
for
(
size_t
h
=
0
;
h
<
H
;
++
h
)
{
for
(
size_t
w
=
0
;
w
<
W
;
++
w
)
{
size_t
index
=
4
*
c
*
(
width
*
H
)
+
4
*
h
*
width
+
4
*
W
*
(
n
/
4
)
+
w
*
4
+
n
%
4
;
if
(
n
<
N
)
{
image
[
index
]
=
Float2Half
(
*
p
);
p
++
;
}
else
{
image
[
index
]
=
0.0
;
}
if
(
index
>=
(
width
*
height
*
4
))
{
LOG
(
INFO
)
<<
" index out of range "
;
}
}
}
}
}
VLOG
(
3
)
<<
" init done"
;
}
void
CLImageConverterNWBlock
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
float
*
p
=
tensor
;
size_t
N
=
tensor_dim
[
0
];
size_t
C
=
tensor_dim
[
1
];
size_t
H
=
tensor_dim
[
2
];
size_t
W
=
tensor_dim
[
3
];
size_t
width
=
image_dim
[
0
];
size_t
height
=
image_dim
[
1
];
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
for
(
size_t
h
=
0
;
h
<
H
;
++
h
)
{
for
(
size_t
w
=
0
;
w
<
W
;
++
w
)
{
size_t
index
=
4
*
c
*
(
width
*
H
)
+
4
*
h
*
width
+
4
*
W
*
(
n
/
4
)
+
w
*
4
+
n
%
4
;
*
p
=
Half2Float
(
image
[
index
]);
p
++
;
if
(
index
>=
(
width
*
height
*
4
))
{
LOG
(
INFO
)
<<
" index out of range "
;
}
}
}
}
}
VLOG
(
3
)
<<
" init done"
;
}
DDim
CLImageConverterDWBlock
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
size_t
N
,
C
,
H
,
W
;
N
=
tensor_dim
[
0
];
C
=
tensor_dim
[
1
];
H
=
tensor_dim
[
2
];
W
=
tensor_dim
[
3
];
size_t
width
=
W
*
((
N
+
3
)
/
4
);
size_t
height
=
C
*
H
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterDWBlock
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
1
];
C
=
new_dims
[
0
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
DDim
in_image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
VLOG
(
3
)
<<
" tensor dim: "
<<
tensor_dim
;
VLOG
(
3
)
<<
" image dim: "
<<
in_image_dim
;
size_t
width
=
in_image_dim
[
0
];
size_t
w_block
=
width
/
W
;
float
*
p
=
tensor
;
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
w_block
*
4
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
if
(
c
<
C
)
{
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
image
[
i2
]
=
Float2Half
(
*
p
);
i2
+=
4
;
p
++
;
}
else
{
image
[
i2
]
=
0.0
;
i2
+=
4
;
}
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
void
CLImageConverterDWBlock
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
float
*
p
=
tensor
;
size_t
N
=
tensor_dim
[
1
];
size_t
C
=
tensor_dim
[
0
];
size_t
H
=
tensor_dim
[
2
];
size_t
W
=
tensor_dim
[
3
];
size_t
width
=
image_dim
[
0
];
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
*
p
=
Half2Float
(
image
[
i2
]);
i2
+=
4
;
p
++
;
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
DDim
CLImageConverterNormal
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
width_of_one_block_
=
W
;
height_of_one_block_
=
H
;
c_block_
=
width
/
W
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterNormal
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
<=
4
&&
tensor_dim
.
size
()
>
0
)
<<
" Tensor dim is not support!"
;
CLImageConverterDefault
default_converter
;
default_converter
.
NCHWToImage
(
tensor
,
image
,
tensor_dim
);
}
void
CLImageConverterNormal
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
CLImageConverterDefault
default_converter
;
default_converter
.
ImageToNCHW
(
image
,
tensor
,
image_dim
,
tensor_dim
);
}
DDim
CLImageConverterWinoTransWeight
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
size_t
N
,
C
;
N
=
tensor_dim
[
0
];
C
=
tensor_dim
[
1
];
size_t
width
=
(
C
+
3
)
/
4
;
size_t
height
=
N
*
16
;
// N * (wino_blk_size + 2) * (wino_blk_size + 2)
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterWinoTransWeight
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{}
void
CLImageConverterWinoTransWeight
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image_converter.h
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_half.h"
namespace
paddle
{
namespace
lite
{
class
CLImageConverterBase
{
public:
virtual
~
CLImageConverterBase
()
{}
virtual
void
NCHWToImage
(
float
*
nchw
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
=
0
;
virtual
void
ImageToNCHW
(
half_t
*
image
,
float
*
nchw
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
=
0
;
virtual
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
=
0
;
};
class
CLImageConverterDefault
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
nchw
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
class
CLImageConverterFolder
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
/*
* width of original tensor
* */
inline
size_t
WidthOfOneBlock
()
const
{
return
width_of_one_block_
;
}
/*
* height of original tensor
* */
inline
size_t
HeightOfOneBlock
()
const
{
return
height_of_one_block_
;
}
int
GetCBlock
()
const
{
return
c_block_
;
}
private:
int
c_block_
;
int
width_of_one_block_
;
int
height_of_one_block_
;
};
class
CLImageConverterNormal
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
/*
* width of original tensor
* */
inline
size_t
WidthOfOneBlock
()
const
{
return
width_of_one_block_
;
}
/*
* height of original tensor
* */
inline
size_t
HeightOfOneBlock
()
const
{
return
height_of_one_block_
;
}
int
GetCBlock
()
const
{
return
c_block_
;
}
private:
int
c_block_
;
int
width_of_one_block_
;
int
height_of_one_block_
;
};
class
CLImageConverterNWBlock
:
public
CLImageConverterBase
{
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
class
CLImageConverterDWBlock
:
public
CLImageConverterBase
{
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
class
CLImageConverterWinoTransWeight
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
inline
half4
activation
(
half4
in
#ifdef PRELU
,
half4
prelu_alpha
#endif
)
{
half4
output
;
#ifdef PRELU
output
=
select
(
prelu_alpha
*
in
,
in
,
in
>=
(
half4
)
0
.
0
);
#endif
#ifdef RELU
output
=
fmax
(
in
,
(
half4
)(
0
.
0
f
));
#endif
return
output
;
}
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
0 → 100644
浏览文件 @
76f4b1fc
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
__kernel
void
elementwise_add
(
__global
image2d_t
input,
__global
image2d_t
bias,__write_only
image2d_t
outputImage
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int2
coords
;
coords.x
=
x
;
coords.y
=
y
;
half4
in
=
read_imageh
(
input,
sampler,
coords
)
;
half4
biase
=
read_imageh
(
bias,
sampler,
coords
)
;
half4
output
=
in
+
biase
;
write_imageh
(
outputImage,coords,output
)
;
}
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
0 → 100644
浏览文件 @
76f4b1fc
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
define
MIN_VALUE
-FLT_MAX
__kernel
void
pool_max
(
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
pad_top,
__private
const
int
pad_left,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
int
ksize_h,
__private
const
int
ksize_w,
__read_only
image2d_t
input,
__write_only
image2d_t
output
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_nh
=
get_global_id
(
2
)
;
const
int
out_n
=
out_nh
/
out_height
;
const
int
out_h
=
out_nh
%
out_height
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int
start_h
=
out_h
*
stride_h
-
pad_top
;
int
end_h
=
min
(
start_h
+
ksize_h,
in_height
)
;
start_h
=
max
(
start_h,0
)
;
int
start_w
=
out_w
*
stride_w
-
pad_left
;
int
end_w
=
min
(
start_w
+
ksize_w,
in_width
)
;
start_w
=
max
(
start_w,0
)
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
half4
max_value
=
(
half4
)(
MIN_VALUE
)
;
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
half4
tmp
=
read_imageh
(
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
max_value
=
max
(
max_value,
tmp
)
;
}
}
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
write_imageh
(
output,
(
int2
)(
pos_out_x,
out_nh
)
,
max_value
)
;
}
__kernel
void
pool_avg
(
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
pad_top,
__private
const
int
pad_left,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
int
ksize_h,
__private
const
int
ksize_w,
__read_only
image2d_t
input,
__write_only
image2d_t
output
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_nh
=
get_global_id
(
2
)
;
const
int
out_n
=
out_nh
/
out_height
;
const
int
out_h
=
out_nh
%
out_height
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int
start_h
=
max
(
out_h
*
stride_h
-
pad_top,
0
)
;
int
end_h
=
min
(
start_h
+
ksize_h,
in_height
)
;
int
start_w
=
max
(
out_w
*
stride_w
-
pad_left,
0
)
;
int
end_w
=
min
(
start_w
+
ksize_w,
in_width
)
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
half4
sum
=
(
half4
)(
0.0f
)
;
int
num
=
0
;
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
sum
+=
read_imageh
(
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
num++
;
}
}
half4
avg
=
sum
/
num
;
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
write_imageh
(
output,
(
int2
)(
pos_out_x,
out_nh
)
,
avg
)
;
}
paddle/fluid/lite/opencl/cl_test.cc
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory>
#include <random>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
DEFINE_string
(
cl_path
,
"/data/local/tmp/opencl"
,
"The OpenCL kernels path."
);
namespace
paddle
{
namespace
lite
{
TEST
(
cl_test
,
engine_test
)
{
auto
*
engine
=
CLEngine
::
Global
();
CHECK
(
engine
->
IsInitSuccess
());
engine
->
set_cl_path
(
FLAGS_cl_path
);
engine
->
platform
();
engine
->
device
();
engine
->
command_queue
();
auto
&
context
=
engine
->
context
();
auto
program
=
engine
->
CreateProgram
(
context
,
engine
->
cl_path
()
+
"/cl_kernel/"
+
"elementwise_add_kernel.cl"
);
auto
event
=
engine
->
CreateEvent
(
context
);
CHECK
(
engine
->
BuildProgram
(
program
.
get
()));
}
TEST
(
cl_test
,
context_test
)
{
auto
*
engine
=
CLEngine
::
Global
();
CHECK
(
engine
->
IsInitSuccess
());
engine
->
set_cl_path
(
FLAGS_cl_path
);
CLContext
context
;
context
.
GetKernel
(
"pool_max"
,
"pool_kernel.cl"
,
""
);
context
.
GetKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
,
""
);
context
.
GetKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
,
""
);
}
TEST
(
cl_test
,
kernel_test
)
{
auto
*
engine
=
CLEngine
::
Global
();
CHECK
(
engine
->
IsInitSuccess
());
engine
->
set_cl_path
(
FLAGS_cl_path
);
std
::
unique_ptr
<
CLContext
>
context
(
new
CLContext
);
// std::unique_ptr<CLHelper> helper(new CLHelper(context.get()));
std
::
unique_ptr
<
CLHelper
>
helper
(
new
CLHelper
);
helper
->
set_context
(
context
.
get
());
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
->
AddKernel
(
"pool_max"
,
"pool_kernel.cl"
);
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
auto
kernel
=
helper
->
KernelAt
(
2
);
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
in_data
[
i
]
=
1.
f
;
}
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
CLImage
in_image
;
in_image
.
set_tensor_data
(
in_data
.
get
(),
in_dim
);
in_image
.
InitNormalCLImage
(
helper
->
OpenCLContext
());
LOG
(
INFO
)
<<
in_image
;
std
::
unique_ptr
<
float
[]
>
bias_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
bias_data
[
i
]
=
2.
f
;
}
const
DDim
bias_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
CLImage
bias_image
;
bias_image
.
set_tensor_data
(
bias_data
.
get
(),
bias_dim
);
bias_image
.
InitNormalCLImage
(
helper
->
OpenCLContext
());
LOG
(
INFO
)
<<
bias_image
;
CLImage
out_image
;
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
out_image
.
InitEmptyImage
(
helper
->
OpenCLContext
(),
out_dim
);
LOG
(
INFO
)
<<
out_image
;
cl_int
status
;
status
=
kernel
.
setArg
(
0
,
*
in_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
1
,
*
bias_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
// auto global_work_size = helper->DefaultWorkSize(out_image);
size_t
width
=
in_image
.
ImageWidth
();
size_t
height
=
in_image
.
ImageHeight
();
auto
global_work_size
=
cl
::
NDRange
{
width
,
height
};
cl
::
Event
event
;
status
=
helper
->
OpenCLCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
nullptr
,
&
event
);
CL_CHECK_ERRORS
(
status
);
double
start_nanos
=
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_START
>
();
double
stop_nanos
=
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_END
>
();
double
elapsed_micros
=
(
stop_nanos
-
start_nanos
)
/
1000.0
;
LOG
(
INFO
)
<<
"Kernel Run Cost Time: "
<<
elapsed_micros
<<
" us."
;
LOG
(
INFO
)
<<
out_image
;
}
TEST
(
cl_test
,
elementwise_add_test
)
{
std
::
default_random_engine
engine
;
std
::
uniform_real_distribution
<
float
>
dist
(
-
5
,
5
);
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
in_data
[
i
]
=
dist
(
engine
);
}
const
DDim
bias_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
std
::
unique_ptr
<
float
[]
>
bias_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
bias_data
[
i
]
=
dist
(
engine
);
}
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
std
::
unique_ptr
<
float
[]
>
out
(
new
float
[
1024
*
512
]);
bool
status
=
InitOpenCLEngine
(
FLAGS_cl_path
);
CHECK
(
status
)
<<
"Fail to initialize OpenCL engine."
;
CLContext
context
;
elementwise_add
(
&
context
,
in_data
.
get
(),
in_dim
,
bias_data
.
get
(),
bias_dim
,
out
.
get
(),
out_dim
);
int
stride
=
1024
*
512
/
20
;
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
+=
stride
)
{
std
::
cout
<<
out
[
i
]
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_tool.cc
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
const
char
*
opencl_error_to_str
(
cl_int
error
)
{
#define CASE_CL_CONSTANT(NAME) \
case NAME: \
return #NAME;
// Suppose that no combinations are possible.
switch
(
error
)
{
CASE_CL_CONSTANT
(
CL_SUCCESS
)
CASE_CL_CONSTANT
(
CL_DEVICE_NOT_FOUND
)
CASE_CL_CONSTANT
(
CL_DEVICE_NOT_AVAILABLE
)
CASE_CL_CONSTANT
(
CL_COMPILER_NOT_AVAILABLE
)
CASE_CL_CONSTANT
(
CL_MEM_OBJECT_ALLOCATION_FAILURE
)
CASE_CL_CONSTANT
(
CL_OUT_OF_RESOURCES
)
CASE_CL_CONSTANT
(
CL_OUT_OF_HOST_MEMORY
)
CASE_CL_CONSTANT
(
CL_PROFILING_INFO_NOT_AVAILABLE
)
CASE_CL_CONSTANT
(
CL_MEM_COPY_OVERLAP
)
CASE_CL_CONSTANT
(
CL_IMAGE_FORMAT_MISMATCH
)
CASE_CL_CONSTANT
(
CL_IMAGE_FORMAT_NOT_SUPPORTED
)
CASE_CL_CONSTANT
(
CL_BUILD_PROGRAM_FAILURE
)
CASE_CL_CONSTANT
(
CL_MAP_FAILURE
)
CASE_CL_CONSTANT
(
CL_MISALIGNED_SUB_BUFFER_OFFSET
)
CASE_CL_CONSTANT
(
CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST
)
CASE_CL_CONSTANT
(
CL_INVALID_VALUE
)
CASE_CL_CONSTANT
(
CL_INVALID_DEVICE_TYPE
)
CASE_CL_CONSTANT
(
CL_INVALID_PLATFORM
)
CASE_CL_CONSTANT
(
CL_INVALID_DEVICE
)
CASE_CL_CONSTANT
(
CL_INVALID_CONTEXT
)
CASE_CL_CONSTANT
(
CL_INVALID_QUEUE_PROPERTIES
)
CASE_CL_CONSTANT
(
CL_INVALID_COMMAND_QUEUE
)
CASE_CL_CONSTANT
(
CL_INVALID_HOST_PTR
)
CASE_CL_CONSTANT
(
CL_INVALID_MEM_OBJECT
)
CASE_CL_CONSTANT
(
CL_INVALID_IMAGE_FORMAT_DESCRIPTOR
)
CASE_CL_CONSTANT
(
CL_INVALID_IMAGE_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_SAMPLER
)
CASE_CL_CONSTANT
(
CL_INVALID_BINARY
)
CASE_CL_CONSTANT
(
CL_INVALID_BUILD_OPTIONS
)
CASE_CL_CONSTANT
(
CL_INVALID_PROGRAM
)
CASE_CL_CONSTANT
(
CL_INVALID_PROGRAM_EXECUTABLE
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL_NAME
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL_DEFINITION
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL
)
CASE_CL_CONSTANT
(
CL_INVALID_ARG_INDEX
)
CASE_CL_CONSTANT
(
CL_INVALID_ARG_VALUE
)
CASE_CL_CONSTANT
(
CL_INVALID_ARG_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL_ARGS
)
CASE_CL_CONSTANT
(
CL_INVALID_WORK_DIMENSION
)
CASE_CL_CONSTANT
(
CL_INVALID_WORK_GROUP_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_WORK_ITEM_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_GLOBAL_OFFSET
)
CASE_CL_CONSTANT
(
CL_INVALID_EVENT_WAIT_LIST
)
CASE_CL_CONSTANT
(
CL_INVALID_EVENT
)
CASE_CL_CONSTANT
(
CL_INVALID_OPERATION
)
CASE_CL_CONSTANT
(
CL_INVALID_GL_OBJECT
)
CASE_CL_CONSTANT
(
CL_INVALID_BUFFER_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_MIP_LEVEL
)
CASE_CL_CONSTANT
(
CL_INVALID_GLOBAL_WORK_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_PROPERTY
)
default:
return
"UNKNOWN ERROR CODE"
;
}
#undef CASE_CL_CONSTANT
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_tool.h
0 → 100644
浏览文件 @
76f4b1fc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/lite/opencl/cl2_header.h"
namespace
paddle
{
namespace
lite
{
const
char
*
opencl_error_to_str
(
cl_int
error
);
#define CL_CHECK_ERRORS(ERR) \
if (ERR != CL_SUCCESS) { \
printf( \
"OpenCL error with code %s happened in file %s at line %d. " \
"Exiting.\n", \
opencl_error_to_str(ERR), __FILE__, __LINE__); \
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_wrapper.cc
0 → 100644
浏览文件 @
76f4b1fc
此差异已折叠。
点击以展开。
paddle/fluid/lite/tools/build.sh
浏览文件 @
76f4b1fc
...
@@ -25,6 +25,23 @@ function cmake_x86 {
...
@@ -25,6 +25,23 @@ function cmake_x86 {
cmake ..
-DWITH_GPU
=
OFF
-DWITH_MKLDNN
=
OFF
-DLITE_WITH_X86
=
ON
${
common_flags
}
cmake ..
-DWITH_GPU
=
OFF
-DWITH_MKLDNN
=
OFF
-DLITE_WITH_X86
=
ON
${
common_flags
}
}
}
function
cmake_opencl
{
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $2: ARM_TARGET_ARCH_ABI in "arm64-v8a", "armeabi-v7a" ,"armeabi-v7a-hf"
cmake ..
\
-DLITE_WITH_OPENCL
=
ON
\
-DWITH_GPU
=
OFF
\
-DWITH_MKL
=
OFF
\
-DWITH_LITE
=
ON
\
-DLITE_WITH_CUDA
=
OFF
\
-DLITE_WITH_X86
=
OFF
\
-DLITE_WITH_ARM
=
ON
\
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK
=
ON
\
-DWITH_TESTING
=
ON
\
-DARM_TARGET_OS
=
$1
-DARM_TARGET_ARCH_ABI
=
$2
}
# This method is only called in CI.
# This method is only called in CI.
function
cmake_x86_for_CI
{
function
cmake_x86_for_CI
{
prepare_for_codegen
# fake an empty __generated_code__.cc to pass cmake.
prepare_for_codegen
# fake an empty __generated_code__.cc to pass cmake.
...
@@ -422,6 +439,10 @@ function main {
...
@@ -422,6 +439,10 @@ function main {
cmake_x86
cmake_x86
shift
shift
;;
;;
cmake_opencl
)
cmake_opencl
$ARM_OS
$ARM_ABI
shift
;;
cmake_cuda
)
cmake_cuda
)
cmake_cuda
cmake_cuda
shift
shift
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录