Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
a36284ca
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a36284ca
编写于
6月 21, 2019
作者:
Z
ZhenWang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cl_caller.
上级
34a290c8
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
279 addition
and
34 deletion
+279
-34
paddle/fluid/lite/opencl/CMakeLists.txt
paddle/fluid/lite/opencl/CMakeLists.txt
+4
-2
paddle/fluid/lite/opencl/cl_caller.cc
paddle/fluid/lite/opencl/cl_caller.cc
+88
-0
paddle/fluid/lite/opencl/cl_caller.h
paddle/fluid/lite/opencl/cl_caller.h
+30
-0
paddle/fluid/lite/opencl/cl_engine.cc
paddle/fluid/lite/opencl/cl_engine.cc
+2
-2
paddle/fluid/lite/opencl/cl_helper.cc
paddle/fluid/lite/opencl/cl_helper.cc
+25
-11
paddle/fluid/lite/opencl/cl_helper.h
paddle/fluid/lite/opencl/cl_helper.h
+4
-2
paddle/fluid/lite/opencl/cl_image.cc
paddle/fluid/lite/opencl/cl_image.cc
+13
-11
paddle/fluid/lite/opencl/cl_image.h
paddle/fluid/lite/opencl/cl_image.h
+4
-4
paddle/fluid/lite/opencl/cl_test.cc
paddle/fluid/lite/opencl/cl_test.cc
+109
-2
未找到文件。
paddle/fluid/lite/opencl/CMakeLists.txt
浏览文件 @
a36284ca
...
...
@@ -7,7 +7,8 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
set_target_properties
(
opencl-lib
PROPERTIES
IMPORTED_LOCATION
${
CMAKE_SOURCE_DIR
}
/opencl-lib/armeabi-v7a/libOpenCL.so
)
#${CMAKE_SOURCE_DIR}/opencl-lib/armeabi-v7a/libOpenCL.so)
${
CMAKE_SOURCE_DIR
}
/opencl-lib/armeabi-v7a/libGLES_mali.so
)
cc_library
(
cl_tool SRCS cl_tool.cc
)
target_compile_options
(
cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers
)
...
...
@@ -18,7 +19,8 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
cc_library
(
cl_helper SRCS cl_helper.cc DEPS cl_context
)
cc_library
(
cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor
)
cc_library
(
cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine
)
lite_cc_test
(
test_cl_runtime SRCS cl_test.cc DEPS cl_engine cl_context
)
cc_library
(
cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image
)
lite_cc_test
(
test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller
)
target_link_libraries
(
test_cl_runtime opencl-lib
)
add_dependencies
(
cl_tool opencl_clhpp
)
endif
()
paddle/fluid/lite/opencl/cl_caller.cc
0 → 100644
浏览文件 @
a36284ca
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
static
void
CopyImageData
(
const
CLImage
&
cl_image
,
float
*
out
)
{
int
width
=
cl_image
.
image_dims
()[
0
];
int
height
=
cl_image
.
image_dims
()[
1
];
half_t
*
image_data
=
new
half_t
[
height
*
width
*
4
];
cl
::
Image
*
image
=
cl_image
.
cl_image
();
const
std
::
array
<
size_t
,
3
>
origin
{
0
,
0
,
0
};
const
std
::
array
<
size_t
,
3
>
region
{
static_cast
<
size_t
>
(
width
),
static_cast
<
size_t
>
(
height
),
1
};
cl_int
err
=
CLEngine
::
Global
()
->
command_queue
().
enqueueReadImage
(
*
image
,
CL_TRUE
,
origin
,
region
,
0
,
0
,
image_data
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
err
);
auto
*
converter
=
cl_image
.
image_converter
();
converter
->
ImageToNCHW
(
image_data
,
out
,
cl_image
.
image_dims
(),
cl_image
.
tensor_dims
());
delete
[]
image_data
;
}
bool
InitOpenCLEngine
(
std
::
string
cl_path
)
{
auto
*
engine
=
CLEngine
::
Global
();
engine
->
set_cl_path
(
cl_path
);
return
engine
->
IsInitSuccess
();
}
void
elementwise_add
(
CLContext
*
context
,
float
*
in
,
const
DDim
&
in_dim
,
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
)
{
CLHelper
helper
(
context
);
helper
.
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
auto
kernel
=
helper
.
KernelAt
(
0
);
CLImage
in_image
;
in_image
.
set_tensor_data
(
in
,
in_dim
);
in_image
.
InitNormalCLImage
(
helper
.
OpenCLContext
());
VLOG
(
3
)
<<
" --- Inpu image: "
<<
in_image
<<
" --- "
;
CLImage
bias_image
;
bias_image
.
set_tensor_data
(
bias
,
bias_dim
);
bias_image
.
InitNormalCLImage
(
helper
.
OpenCLContext
());
VLOG
(
3
)
<<
" --- Bias image: "
<<
bias_image
<<
" --- "
;
CLImage
out_image
;
out_image
.
InitEmptyImage
(
helper
.
OpenCLContext
(),
out_dim
);
cl_int
status
;
status
=
kernel
.
setArg
(
0
,
*
in_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
1
,
*
bias_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
size_t
width
=
in_image
.
ImageWidth
();
size_t
height
=
in_image
.
ImageHeight
();
auto
global_work_size
=
cl
::
NDRange
{
width
,
height
};
status
=
helper
.
OpenCLCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
status
);
VLOG
(
3
)
<<
" --- Out image: "
<<
out_image
<<
" --- "
;
CopyImageData
(
out_image
,
out
);
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_caller.h
0 → 100644
浏览文件 @
a36284ca
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
namespace
paddle
{
namespace
lite
{
bool
InitOpenCLEngine
(
std
::
string
cl_path
);
void
elementwise_add
(
CLContext
*
context
,
float
*
in
,
const
DDim
&
in_dim
,
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
);
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_engine.cc
浏览文件 @
a36284ca
...
...
@@ -133,10 +133,10 @@ bool CLEngine::InitializePlatform() {
bool
CLEngine
::
InitializeDevice
()
{
std
::
vector
<
cl
::
Device
>
all_devices
;
status_
=
platform_
->
getDevices
(
CL_DEVICE_TYPE_
DEFAULT
,
&
all_devices
);
status_
=
platform_
->
getDevices
(
CL_DEVICE_TYPE_
GPU
,
&
all_devices
);
CL_CHECK_ERRORS
(
status_
);
if
(
all_devices
.
empty
())
{
LOG
(
ERROR
)
<<
"No OpenCL device found!"
;
LOG
(
ERROR
)
<<
"No OpenCL
GPU
device found!"
;
return
false
;
}
device_
=
std
::
make_shared
<
cl
::
Device
>
();
...
...
paddle/fluid/lite/opencl/cl_helper.cc
浏览文件 @
a36284ca
...
...
@@ -21,9 +21,12 @@ limitations under the License. */
namespace
paddle
{
namespace
lite
{
void
CLHelper
::
set_context
(
CLContext
*
context
)
{
context_
=
context
;
}
void
CLHelper
::
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
)
{
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
VLOG
(
3
)
<<
" --- begin to add kernel ---"
;
auto
kernel
=
context_
->
GetKernel
(
kernel_name
,
file_name
,
options
);
kernels
.
emplace_back
(
std
::
move
(
kernel
));
...
...
@@ -32,16 +35,24 @@ void CLHelper::AddKernel(const std::string &kernel_name,
cl
::
Kernel
&
CLHelper
::
KernelAt
(
const
int
index
)
{
VLOG
(
3
)
<<
" --- kernel count: "
<<
kernels
.
size
()
<<
" --- "
;
CHECK
(
static_cast
<
size_t
>
(
index
)
<
kernels
.
size
())
<<
"The index must be less than the size of kernels."
;
CHECK
(
kernels
[
index
]
!=
nullptr
)
<<
"The target kernel pointer cannot be null."
;
return
*
(
kernels
[
index
]);
}
cl
::
CommandQueue
&
CLHelper
::
OpenCLCommandQueue
()
{
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
return
context_
->
GetCommandQueue
();
}
cl
::
Context
&
CLHelper
::
OpenCLContext
()
{
return
context_
->
GetContext
();
}
cl
::
Context
&
CLHelper
::
OpenCLContext
()
{
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
return
context_
->
GetContext
();
}
std
::
vector
<
size_t
>
CLHelper
::
DefaultWorkSize
(
const
CLImage
&
image
)
{
cl
::
NDRange
CLHelper
::
DefaultWorkSize
(
const
CLImage
&
image
)
{
// n c h w
auto
image_dim
=
image
.
tensor_dims
();
if
(
image_dim
.
size
()
==
4
)
{
...
...
@@ -52,23 +63,26 @@ std::vector<size_t> CLHelper::DefaultWorkSize(const CLImage &image) {
auto
work_size_0
=
image_width
/
w
;
auto
work_size_1
=
w
;
auto
work_size_2
=
n
*
h
;
return
{
static_cast
<
size_t
>
(
work_size_0
),
static_cast
<
size_t
>
(
work_size_1
),
return
cl
::
NDRange
{
static_cast
<
size_t
>
(
work_size_0
),
static_cast
<
size_t
>
(
work_size_1
),
static_cast
<
size_t
>
(
work_size_2
)};
}
else
if
(
image_dim
.
size
()
==
2
)
{
return
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
image
.
ImageWidth
()),
return
cl
::
NDRange
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
image
.
ImageWidth
()),
static_cast
<
size_t
>
(
image
.
ImageHeight
())};
}
else
if
(
image_dim
.
size
()
==
1
)
{
return
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
image
.
ImageWidth
()),
return
cl
::
NDRange
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
image
.
ImageWidth
()),
static_cast
<
size_t
>
(
1
)};
}
else
if
(
image_dim
.
size
()
==
3
)
{
auto
c
=
image_dim
[
0
];
auto
h
=
image_dim
[
1
];
auto
w
=
image_dim
[
2
];
return
{
static_cast
<
size_t
>
((
c
+
3
)
/
4
),
static_cast
<
size_t
>
(
w
),
return
cl
::
NDRange
{
static_cast
<
size_t
>
((
c
+
3
)
/
4
),
static_cast
<
size_t
>
(
w
),
static_cast
<
size_t
>
(
h
)};
}
else
{
LOG
(
FATAL
)
<<
"Not support this dimension, need to be implemented!"
;
return
{};
return
cl
::
NDRange
{};
}
}
...
...
paddle/fluid/lite/opencl/cl_helper.h
浏览文件 @
a36284ca
...
...
@@ -30,6 +30,8 @@ class CLHelper {
explicit
CLHelper
(
CLContext
*
context
)
:
context_
(
context
)
{}
void
set_context
(
CLContext
*
context
);
void
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
=
""
);
...
...
@@ -39,10 +41,10 @@ class CLHelper {
cl
::
Context
&
OpenCLContext
();
std
::
vector
<
size_t
>
DefaultWorkSize
(
const
CLImage
&
image
);
cl
::
NDRange
DefaultWorkSize
(
const
CLImage
&
image
);
private:
CLContext
*
context_
;
CLContext
*
context_
{
nullptr
}
;
std
::
vector
<
std
::
unique_ptr
<
cl
::
Kernel
>>
kernels
;
};
...
...
paddle/fluid/lite/opencl/cl_image.cc
浏览文件 @
a36284ca
...
...
@@ -27,12 +27,12 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) {
int
height
=
cl_image
.
image_dims_
[
1
];
half_t
*
image_data
=
new
half_t
[
height
*
width
*
4
];
cl
::
Image
2D
&
image
=
cl_image
.
cl_image
();
cl
::
Image
*
image
=
cl_image
.
cl_image
();
const
std
::
array
<
size_t
,
3
>
origin
{
0
,
0
,
0
};
const
std
::
array
<
size_t
,
3
>
region
{
static_cast
<
size_t
>
(
width
),
static_cast
<
size_t
>
(
height
),
1
};
cl_int
err
=
CLEngine
::
Global
()
->
command_queue
().
enqueueReadImage
(
image
,
CL_TRUE
,
origin
,
region
,
0
,
0
,
image_data
,
nullptr
,
nullptr
);
*
image
,
CL_TRUE
,
origin
,
region
,
0
,
0
,
image_data
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
err
);
float
*
tensor_data
=
new
float
[
cl_image
.
numel
()];
...
...
@@ -53,7 +53,7 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) {
return
os
;
}
void
CLImage
::
SetTensorD
ata
(
float
*
tensor_data
,
const
DDim
&
dim
)
{
void
CLImage
::
set_tensor_d
ata
(
float
*
tensor_data
,
const
DDim
&
dim
)
{
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
auto
numel
=
dim
.
product
();
#else
...
...
@@ -65,28 +65,30 @@ void CLImage::SetTensorData(float* tensor_data, const DDim& dim) {
}
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call SetTensorData first!"
;
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call "
"set_tensohelper->DefaultWorkSize(out_"
"image)r_data first!"
;
image_converter_
.
reset
(
new
CLImageConverterFolder
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitNormalCLImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call
SetTensorD
ata first!"
;
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call
set_tensor_d
ata first!"
;
image_converter_
.
reset
(
new
CLImageConverterNormal
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitNImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call
SetTensorD
ata first!"
;
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call
set_tensor_d
ata first!"
;
CHECK
(
tensor_dims_
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
image_converter_
.
reset
(
new
CLImageConverterNWBlock
()
);
image_converter_
.
reset
(
new
CLImageConverterNWBlock
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitDWImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call
SetTensorD
ata first!"
;
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call
set_tensor_d
ata first!"
;
CHECK
(
tensor_dims_
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
image_converter_
.
reset
(
new
CLImageConverterDWBlock
()
);
image_converter_
.
reset
(
new
CLImageConverterDWBlock
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
...
...
@@ -95,7 +97,7 @@ void CLImage::InitEmptyImage(const cl::Context& context, const DDim& dim) {
<<
" Empty image tensor data shouldn't have value"
;
tensor_dims_
=
dim
;
image_converter_
.
reset
(
new
CLImageConverterNormal
()
);
image_converter_
.
reset
(
new
CLImageConverterNormal
);
VLOG
(
3
)
<<
" to get image dims "
;
image_dims_
=
image_converter_
->
InitImageDimInfoWith
(
tensor_dims_
);
...
...
@@ -123,7 +125,7 @@ void CLImage::InitEmptyWithImageDim(const cl::Context& context,
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
,
CLImageConverterBase
*
converter
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call
SetTensorD
ata first!"
;
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call
set_tensor_d
ata first!"
;
VLOG
(
3
)
<<
" begin init cl image "
;
image_dims_
=
converter
->
InitImageDimInfoWith
(
tensor_dims_
);
...
...
paddle/fluid/lite/opencl/cl_image.h
浏览文件 @
a36284ca
...
...
@@ -33,11 +33,11 @@ class CLImage {
/*
* Will not hold input tensor data, memcpy in this method.
* */
void
SetTensorD
ata
(
float
*
tensor_data
,
const
DDim
&
dim
);
void
set_tensor_d
ata
(
float
*
tensor_data
,
const
DDim
&
dim
);
bool
IsInit
()
{
return
initialized_
;
}
/*
* Need call
SetTensorD
ata first.
* Need call
set_tensor_d
ata first.
* Folder when one dim or two dim.
* */
void
InitCLImage
(
const
cl
::
Context
&
context
);
...
...
@@ -53,7 +53,7 @@ class CLImage {
void
InitEmptyWithImageDim
(
const
cl
::
Context
&
context
,
const
DDim
&
image_dims
);
cl
::
Image
2D
&
cl_image
()
const
{
return
*
cl_image_
;
}
cl
::
Image
*
cl_image
()
const
{
return
cl_image_
.
get
()
;
}
const
DDim
&
image_dims
()
const
{
return
image_dims_
;
}
...
...
@@ -63,7 +63,7 @@ class CLImage {
const
DDim
&
tensor_dims
()
const
{
return
tensor_dims_
;
}
/*
/*
with_da
* Resize original tensor dim.
* */
inline
CLImage
&
Resize
(
const
DDim
&
dims
)
{
...
...
paddle/fluid/lite/opencl/cl_test.cc
浏览文件 @
a36284ca
...
...
@@ -12,10 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory>
#include <random>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
DEFINE_string
(
cl_path
,
"/data/local/tmp/opencl"
,
"The OpenCL kernels path."
);
namespace
paddle
{
namespace
lite
{
...
...
@@ -23,7 +33,7 @@ namespace lite {
TEST
(
cl_test
,
engine_test
)
{
auto
*
engine
=
CLEngine
::
Global
();
CHECK
(
engine
->
IsInitSuccess
());
engine
->
set_cl_path
(
"/data/local/tmp/opencl"
);
engine
->
set_cl_path
(
FLAGS_cl_path
);
engine
->
platform
();
engine
->
device
();
engine
->
command_queue
();
...
...
@@ -37,11 +47,108 @@ TEST(cl_test, engine_test) {
TEST
(
cl_test
,
context_test
)
{
auto
*
engine
=
CLEngine
::
Global
();
CHECK
(
engine
->
IsInitSuccess
());
engine
->
set_cl_path
(
"/data/local/tmp/opencl"
);
engine
->
set_cl_path
(
FLAGS_cl_path
);
CLContext
context
;
context
.
GetKernel
(
"pool_max"
,
"pool_kernel.cl"
,
""
);
context
.
GetKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
,
""
);
context
.
GetKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
,
""
);
}
TEST
(
cl_test
,
kernel_test
)
{
auto
*
engine
=
CLEngine
::
Global
();
CHECK
(
engine
->
IsInitSuccess
());
engine
->
set_cl_path
(
FLAGS_cl_path
);
std
::
unique_ptr
<
CLContext
>
context
(
new
CLContext
);
// std::unique_ptr<CLHelper> helper(new CLHelper(context.get()));
std
::
unique_ptr
<
CLHelper
>
helper
(
new
CLHelper
);
helper
->
set_context
(
context
.
get
());
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
->
AddKernel
(
"pool_max"
,
"pool_kernel.cl"
);
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
auto
kernel
=
helper
->
KernelAt
(
2
);
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
in_data
[
i
]
=
1.
f
;
}
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
CLImage
in_image
;
in_image
.
set_tensor_data
(
in_data
.
get
(),
in_dim
);
in_image
.
InitNormalCLImage
(
helper
->
OpenCLContext
());
LOG
(
INFO
)
<<
in_image
;
std
::
unique_ptr
<
float
[]
>
bias_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
bias_data
[
i
]
=
2.
f
;
}
const
DDim
bias_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
CLImage
bias_image
;
bias_image
.
set_tensor_data
(
bias_data
.
get
(),
bias_dim
);
bias_image
.
InitNormalCLImage
(
helper
->
OpenCLContext
());
LOG
(
INFO
)
<<
bias_image
;
CLImage
out_image
;
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
out_image
.
InitEmptyImage
(
helper
->
OpenCLContext
(),
out_dim
);
LOG
(
INFO
)
<<
out_image
;
cl_int
status
;
status
=
kernel
.
setArg
(
0
,
*
in_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
1
,
*
bias_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
// auto global_work_size = helper->DefaultWorkSize(out_image);
size_t
width
=
in_image
.
ImageWidth
();
size_t
height
=
in_image
.
ImageHeight
();
auto
global_work_size
=
cl
::
NDRange
{
width
,
height
};
cl
::
Event
event
;
status
=
helper
->
OpenCLCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
nullptr
,
&
event
);
CL_CHECK_ERRORS
(
status
);
double
start_nanos
=
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_START
>
();
double
stop_nanos
=
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_END
>
();
double
elapsed_micros
=
(
stop_nanos
-
start_nanos
)
/
1000.0
;
LOG
(
INFO
)
<<
"Kernel Run Cost Time: "
<<
elapsed_micros
<<
" us."
;
LOG
(
INFO
)
<<
out_image
;
}
TEST
(
cl_test
,
elementwise_add_test
)
{
std
::
default_random_engine
engine
;
std
::
uniform_real_distribution
<
float
>
dist
(
-
5
,
5
);
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
in_data
[
i
]
=
dist
(
engine
);
}
const
DDim
bias_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
std
::
unique_ptr
<
float
[]
>
bias_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
bias_data
[
i
]
=
dist
(
engine
);
}
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
std
::
unique_ptr
<
float
[]
>
out
(
new
float
[
1024
*
512
]);
bool
status
=
InitOpenCLEngine
(
FLAGS_cl_path
);
CHECK
(
status
)
<<
"Fail to initialize OpenCL engine."
;
CLContext
context
;
elementwise_add
(
&
context
,
in_data
.
get
(),
in_dim
,
bias_data
.
get
(),
bias_dim
,
out
.
get
(),
out_dim
);
int
stride
=
1024
*
512
/
20
;
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
+=
stride
)
{
std
::
cout
<<
out
[
i
]
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
}
}
// namespace lite
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录