Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
44b09240
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
44b09240
编写于
6月 25, 2019
作者:
Z
ZhenWang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add pool2d opencl support.
上级
b067ce50
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
487 addition
and
148 deletion
+487
-148
paddle/fluid/lite/api/paddle_use_kernels.h
paddle/fluid/lite/api/paddle_use_kernels.h
+1
-0
paddle/fluid/lite/kernels/host/feed_compute.cc
paddle/fluid/lite/kernels/host/feed_compute.cc
+0
-2
paddle/fluid/lite/kernels/opencl/CMakeLists.txt
paddle/fluid/lite/kernels/opencl/CMakeLists.txt
+8
-1
paddle/fluid/lite/kernels/opencl/elementwise_add_compute_test.cc
...fluid/lite/kernels/opencl/elementwise_add_compute_test.cc
+6
-6
paddle/fluid/lite/kernels/opencl/pool_compute.cc
paddle/fluid/lite/kernels/opencl/pool_compute.cc
+67
-0
paddle/fluid/lite/kernels/opencl/pool_compute_test.cc
paddle/fluid/lite/kernels/opencl/pool_compute_test.cc
+123
-0
paddle/fluid/lite/opencl/CMakeLists.txt
paddle/fluid/lite/opencl/CMakeLists.txt
+2
-2
paddle/fluid/lite/opencl/cl_caller.cc
paddle/fluid/lite/opencl/cl_caller.cc
+58
-0
paddle/fluid/lite/opencl/cl_caller.h
paddle/fluid/lite/opencl/cl_caller.h
+5
-0
paddle/fluid/lite/opencl/cl_image_converter.h
paddle/fluid/lite/opencl/cl_image_converter.h
+23
-18
paddle/fluid/lite/opencl/cl_test.cc
paddle/fluid/lite/opencl/cl_test.cc
+76
-3
paddle/fluid/lite/opencl/cl_wrapper.cxx
paddle/fluid/lite/opencl/cl_wrapper.cxx
+118
-116
未找到文件。
paddle/fluid/lite/api/paddle_use_kernels.h
浏览文件 @
44b09240
...
@@ -64,4 +64,5 @@ USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host);
...
@@ -64,4 +64,5 @@ USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host);
#ifdef LITE_WITH_OPENCL
#ifdef LITE_WITH_OPENCL
USE_LITE_KERNEL
(
elementwise_add
,
kOpenCL
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_add
,
kOpenCL
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
pool2d
,
kOpenCL
,
kFloat
,
kNCHW
,
def
);
#endif
#endif
paddle/fluid/lite/kernels/host/feed_compute.cc
浏览文件 @
44b09240
...
@@ -31,8 +31,6 @@ class FeedCompute
...
@@ -31,8 +31,6 @@ class FeedCompute
VLOG
(
4
)
<<
"col "
<<
param
.
col
;
VLOG
(
4
)
<<
"col "
<<
param
.
col
;
const
lite
::
Tensor
&
feed_item
=
(
*
param
.
feed_list
)[
0
];
const
lite
::
Tensor
&
feed_item
=
(
*
param
.
feed_list
)[
0
];
param
.
out
->
ShareDataWith
(
feed_item
);
param
.
out
->
ShareDataWith
(
feed_item
);
VLOG
(
4
)
<<
"FEED input "
<<
feed_item
<<
" col "
<<
param
.
col
;
VLOG
(
4
)
<<
"FEED output "
<<
*
param
.
out
;
}
}
};
};
...
...
paddle/fluid/lite/kernels/opencl/CMakeLists.txt
浏览文件 @
44b09240
...
@@ -5,12 +5,19 @@ endif()
...
@@ -5,12 +5,19 @@ endif()
set
(
cl_kernel_deps op_params_lite cl_caller cl_engine cl_context cl_wrapper
)
set
(
cl_kernel_deps op_params_lite cl_caller cl_engine cl_context cl_wrapper
)
cc_library
(
elementwise_add_opencl SRCS elementwise_add_compute.cc DEPS
${
cl_kernel_deps
}
)
cc_library
(
elementwise_add_opencl SRCS elementwise_add_compute.cc DEPS
${
cl_kernel_deps
}
)
cc_library
(
pool_opencl SRCS pool_compute.cc DEPS
${
cl_kernel_deps
}
)
lite_cc_test
(
test_elementwise_add_opencl SRCS elementwise_add_compute_test.cc DEPS elementwise_add_opencl
lite_cc_test
(
test_elementwise_add_opencl SRCS elementwise_add_compute_test.cc DEPS elementwise_add_opencl
op_registry_lite program_lite
op_registry_lite program_lite
context_lite
context_lite
)
)
lite_cc_test
(
test_pool_opencl SRCS pool_compute_test.cc DEPS pool_opencl
op_registry_lite program_lite
context_lite
)
set
(
opencl_kernels
set
(
opencl_kernels
elementwise_add_opencl
elementwise_add_opencl
CACHE INTERNAL
""
)
pool_opencl
CACHE INTERNAL
"opencl_kernels"
)
paddle/fluid/lite/kernels/opencl/elementwise_add_compute_test.cc
浏览文件 @
44b09240
...
@@ -40,23 +40,23 @@ TEST(elementwise_add, init) {
...
@@ -40,23 +40,23 @@ TEST(elementwise_add, init) {
kernel
->
SetParam
(
param
);
kernel
->
SetParam
(
param
);
kernel
->
SetContext
(
std
::
move
(
context
));
kernel
->
SetContext
(
std
::
move
(
context
));
X
.
Resize
({
1
,
1
,
1
,
10
});
X
.
Resize
({
4
,
3
,
10
,
10
});
Y
.
Resize
({
1
,
1
,
1
,
10
});
Y
.
Resize
({
4
,
3
,
10
,
10
});
Out
.
Resize
({
1
,
1
,
1
,
10
});
Out
.
Resize
({
4
,
3
,
10
,
10
});
auto
*
x_data
=
X
.
mutable_data
<
float
>
();
auto
*
x_data
=
X
.
mutable_data
<
float
>
();
auto
*
y_data
=
Y
.
mutable_data
<
float
>
();
auto
*
y_data
=
Y
.
mutable_data
<
float
>
();
auto
*
out_data
=
Out
.
mutable_data
<
float
>
();
auto
*
out_data
=
Out
.
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
for
(
int
i
=
0
;
i
<
4
*
3
*
10
*
10
;
i
++
)
{
x_data
[
i
]
=
1.1
*
i
;
x_data
[
i
]
=
1.1
*
i
;
y_data
[
i
]
=
2.3
*
i
;
y_data
[
i
]
=
2.3
*
i
;
}
}
kernel
->
Launch
();
kernel
->
Launch
();
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
for
(
int
i
=
0
;
i
<
4
*
3
*
10
*
10
;
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
3.4
*
i
,
1e-6
);
EXPECT_NEAR
(
out_data
[
i
],
static_cast
<
float
>
(
3.4
*
i
)
,
1e-6
);
}
}
}
}
...
...
paddle/fluid/lite/kernels/opencl/pool_compute.cc
0 → 100644
浏览文件 @
44b09240
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/operators/op_params.h"
// NOTE ugly here, hide these.
#include "paddle/fluid/lite/opencl/cl_caller.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
opencl
{
class
PoolCompute
:
public
KernelLite
<
TARGET
(
kOpenCL
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)
>
{
public:
using
param_t
=
operators
::
PoolParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
in_dims
=
param
.
x
->
dims
();
auto
&
out_dims
=
param
.
output
->
dims
();
const
std
::
string
pooling_type
=
param
.
pooling_type
;
bool
global_pooling
=
param
.
global_pooling
;
std
::
vector
<
int
>&
paddings
=
param
.
paddings
;
std
::
vector
<
int
>&
strides
=
param
.
strides
;
std
::
vector
<
int
>&
ksize
=
param
.
ksize
;
if
(
global_pooling
)
{
for
(
size_t
i
=
0
;
i
<
ksize
.
size
();
++
i
)
{
paddings
[
i
]
=
0
;
ksize
[
i
]
=
static_cast
<
int
>
(
in_dims
[
i
+
2
]);
}
}
auto
&
context
=
ctx_
->
As
<
OpenClContext
>
();
CHECK
(
context
.
cl_helper
()
!=
nullptr
);
pool
(
context
.
cl_helper
(),
pooling_type
,
paddings
[
0
],
paddings
[
1
],
strides
[
0
],
strides
[
1
],
ksize
[
0
],
ksize
[
1
],
static_cast
<
const
float
*>
(
param
.
x
->
raw_data
()),
in_dims
,
param
.
output
->
mutable_data
<
float
>
(),
out_dims
);
}
};
}
// namespace opencl
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
pool2d
,
kOpenCL
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
opencl
::
PoolCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
Finalize
();
paddle/fluid/lite/kernels/opencl/pool_compute_test.cc
0 → 100644
浏览文件 @
44b09240
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <random>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
void
pool_avg
(
const
int
padding_height
,
const
int
padding_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
float
*
input_data
,
const
DDim
&
in_dim
,
float
*
output_data
,
const
DDim
&
out_dim
)
{
const
int
batch_size
=
in_dim
[
0
];
const
int
input_height
=
in_dim
[
2
];
const
int
input_width
=
in_dim
[
3
];
const
int
output_channels
=
out_dim
[
1
];
const
int
output_height
=
out_dim
[
2
];
const
int
output_width
=
out_dim
[
3
];
const
size_t
input_spatial_size
=
input_height
*
input_width
;
const
size_t
output_spatial_size
=
output_height
*
output_width
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
int
channel
=
i
*
output_channels
+
c
;
const
float
*
input_ptr
=
input_data
+
channel
*
input_spatial_size
;
float
*
output_ptr
=
output_data
+
channel
*
output_spatial_size
;
for
(
int
ph
=
0
;
ph
<
output_height
;
++
ph
)
{
int
hstart
=
ph
*
stride_height
-
padding_height
;
int
hend
=
std
::
min
(
hstart
+
ksize_height
,
input_height
);
hstart
=
std
::
max
(
hstart
,
0
);
for
(
int
pw
=
0
;
pw
<
output_width
;
++
pw
)
{
int
wstart
=
pw
*
stride_width
-
padding_width
;
int
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wstart
=
std
::
max
(
wstart
,
0
);
float
val
=
0.
f
;
int
count
=
0
;
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
val
+=
input_ptr
[
h
*
input_width
+
w
];
++
count
;
}
}
output_ptr
[
ph
*
output_width
+
pw
]
=
(
count
>
0
)
?
val
*
(
1.
f
/
count
)
:
0.
f
;
}
}
}
}
}
TEST
(
pool2d
,
init
)
{
LOG
(
INFO
)
<<
"to get kernel ..."
;
auto
kernels
=
KernelRegistry
::
Global
().
Create
(
"pool2d"
,
TARGET
(
kOpenCL
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
));
ASSERT_FALSE
(
kernels
.
empty
());
auto
kernel
=
std
::
move
(
kernels
.
front
());
LOG
(
INFO
)
<<
"get kernel"
;
lite
::
Tensor
x
,
out
;
operators
::
PoolParam
param
;
param
.
x
=
&
x
;
param
.
output
=
&
out
;
param
.
global_pooling
=
true
;
param
.
pooling_type
=
"avg"
;
param
.
paddings
=
std
::
vector
<
int
>
{
0
,
0
};
param
.
strides
=
std
::
vector
<
int
>
{
1
,
1
};
param
.
ksize
=
std
::
vector
<
int
>
{
7
,
7
};
std
::
unique_ptr
<
KernelContext
>
context
(
new
KernelContext
);
context
->
As
<
OpenClContext
>
().
InitOnce
();
kernel
->
SetParam
(
param
);
kernel
->
SetContext
(
std
::
move
(
context
));
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
4
,
1024
,
7
,
7
});
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
4
,
1024
,
1
,
1
});
x
.
Resize
(
in_dim
);
out
.
Resize
(
out_dim
);
auto
*
x_data
=
x
.
mutable_data
<
float
>
();
auto
*
out_data
=
out
.
mutable_data
<
float
>
();
std
::
default_random_engine
engine
;
std
::
uniform_real_distribution
<
float
>
dist
(
-
5
,
5
);
for
(
int
i
=
0
;
i
<
4
*
1024
*
7
*
7
;
i
++
)
{
x_data
[
i
]
=
dist
(
engine
);
}
kernel
->
Launch
();
std
::
unique_ptr
<
float
[]
>
out_ref
(
new
float
[
4
*
1024
*
1
*
1
]);
pool_avg
(
0
,
0
,
1
,
1
,
7
,
7
,
x_data
,
in_dim
,
out_ref
.
get
(),
out_dim
);
for
(
int
i
=
0
;
i
<
4
*
1024
*
1
*
1
;
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
out_ref
[
i
],
1e-6
);
}
}
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
pool2d
,
kOpenCL
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/opencl/CMakeLists.txt
浏览文件 @
44b09240
...
@@ -2,8 +2,8 @@ if (NOT LITE_WITH_OPENCL)
...
@@ -2,8 +2,8 @@ if (NOT LITE_WITH_OPENCL)
return
()
return
()
endif
()
endif
()
cc_library
(
cl_wrapper SRCS cl_wrapper.c
c
)
cc_library
(
cl_wrapper SRCS cl_wrapper.c
xx
)
cc_library
(
cl_tool SRCS cl_tool.cc
)
cc_library
(
cl_tool SRCS cl_tool.cc
DEPS cl_wrapper
)
target_compile_options
(
cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers
)
target_compile_options
(
cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers
)
cc_library
(
cl_engine SRCS cl_engine.cc DEPS cl_tool
)
cc_library
(
cl_engine SRCS cl_engine.cc DEPS cl_tool
)
cc_library
(
cl_context SRCS cl_context.cc DEPS cl_engine
)
cc_library
(
cl_context SRCS cl_context.cc DEPS cl_engine
)
...
...
paddle/fluid/lite/opencl/cl_caller.cc
浏览文件 @
44b09240
...
@@ -19,6 +19,7 @@ limitations under the License. */
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
#include "paddle/fluid/lite/utils/string.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -94,5 +95,62 @@ void elementwise_add(CLHelper* helper, const float* in, const DDim& in_dim,
...
@@ -94,5 +95,62 @@ void elementwise_add(CLHelper* helper, const float* in, const DDim& in_dim,
CopyImageData
(
helper
,
out_image
,
out
);
CopyImageData
(
helper
,
out_image
,
out
);
}
}
void
pool
(
CLHelper
*
helper
,
const
std
::
string
pooling_type
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
ksize_h
,
const
int
ksize_w
,
const
float
*
in
,
const
DDim
&
in_dim
,
float
*
out
,
const
DDim
&
out_dim
)
{
auto
kernel
=
helper
->
GetKernel
(
string_format
(
"pool_%s"
,
pooling_type
.
c_str
()));
CLImage
in_image
;
in_image
.
set_tensor_data
(
in
,
in_dim
);
in_image
.
InitNormalCLImage
(
helper
->
OpenCLContext
());
VLOG
(
3
)
<<
" --- Inpu image: "
<<
in_image
<<
" --- "
;
CLImage
out_image
;
out_image
.
InitEmptyImage
(
helper
->
OpenCLContext
(),
out_dim
);
auto
global_work_size
=
helper
->
DefaultWorkSize
(
out_image
);
auto
*
in_converter
=
dynamic_cast
<
CLImageConverterNormal
*>
(
in_image
.
image_converter
());
auto
*
out_converter
=
dynamic_cast
<
CLImageConverterNormal
*>
(
out_image
.
image_converter
());
const
int
in_height
=
in_converter
->
HeightOfOneBlock
();
const
int
in_width
=
in_converter
->
WidthOfOneBlock
();
const
int
out_height
=
out_converter
->
HeightOfOneBlock
();
const
int
out_width
=
out_converter
->
WidthOfOneBlock
();
cl_int
status
;
status
=
kernel
.
setArg
(
0
,
in_height
);
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
1
,
in_width
);
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
2
,
out_height
);
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
3
,
out_width
);
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
4
,
pad_h
);
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
5
,
pad_w
);
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
6
,
stride_h
);
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
7
,
stride_w
);
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
8
,
ksize_h
);
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
9
,
ksize_w
);
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
10
,
*
in_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
11
,
*
out_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
helper
->
OpenCLCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
status
);
status
=
helper
->
OpenCLCommandQueue
().
finish
();
CL_CHECK_ERRORS
(
status
);
VLOG
(
3
)
<<
" --- Out image: "
<<
out_image
<<
" --- "
;
CopyImageData
(
helper
,
out_image
,
out
);
}
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
paddle/fluid/lite/opencl/cl_caller.h
浏览文件 @
44b09240
...
@@ -31,5 +31,10 @@ void elementwise_add(CLHelper* helper, const float* in, const DDim& in_dim,
...
@@ -31,5 +31,10 @@ void elementwise_add(CLHelper* helper, const float* in, const DDim& in_dim,
const
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
);
const
DDim
&
out_dim
);
void
pool
(
CLHelper
*
helper
,
const
std
::
string
pooling_type
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
ksize_h
,
const
int
ksize_w
,
const
float
*
in
,
const
DDim
&
in_dim
,
float
*
out
,
const
DDim
&
out_dim
);
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image_converter.h
浏览文件 @
44b09240
...
@@ -33,18 +33,19 @@ class CLImageConverterBase {
...
@@ -33,18 +33,19 @@ class CLImageConverterBase {
class
CLImageConverterDefault
:
public
CLImageConverterBase
{
class
CLImageConverterDefault
:
public
CLImageConverterBase
{
public:
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
override
;
void
NCHWToImage
(
float
*
nchw
,
float
*
image
,
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
nchw
,
float
*
image
,
const
DDim
&
tensor_dim
)
override
;
void
ImageToNCHW
(
float
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
void
ImageToNCHW
(
float
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
const
DDim
&
tensor_dim
)
override
;
};
};
class
CLImageConverterFolder
:
public
CLImageConverterBase
{
class
CLImageConverterFolder
:
public
CLImageConverterBase
{
public:
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
override
;
void
NCHWToImage
(
float
*
tensor
,
float
*
image
,
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
float
*
image
,
const
DDim
&
tensor_dim
)
override
;
void
ImageToNCHW
(
float
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
void
ImageToNCHW
(
float
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
const
DDim
&
tensor_dim
)
override
;
/*
/*
* width of original tensor
* width of original tensor
...
@@ -66,10 +67,11 @@ class CLImageConverterFolder : public CLImageConverterBase {
...
@@ -66,10 +67,11 @@ class CLImageConverterFolder : public CLImageConverterBase {
class
CLImageConverterNormal
:
public
CLImageConverterBase
{
class
CLImageConverterNormal
:
public
CLImageConverterBase
{
public:
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
override
;
void
NCHWToImage
(
float
*
tensor
,
float
*
image
,
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
float
*
image
,
const
DDim
&
tensor_dim
)
override
;
void
ImageToNCHW
(
float
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
void
ImageToNCHW
(
float
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
const
DDim
&
tensor_dim
)
override
;
/*
/*
* width of original tensor
* width of original tensor
...
@@ -90,24 +92,27 @@ class CLImageConverterNormal : public CLImageConverterBase {
...
@@ -90,24 +92,27 @@ class CLImageConverterNormal : public CLImageConverterBase {
};
};
class
CLImageConverterNWBlock
:
public
CLImageConverterBase
{
class
CLImageConverterNWBlock
:
public
CLImageConverterBase
{
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
override
;
void
NCHWToImage
(
float
*
tensor
,
float
*
image
,
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
float
*
image
,
const
DDim
&
tensor_dim
)
override
;
void
ImageToNCHW
(
float
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
void
ImageToNCHW
(
float
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
const
DDim
&
tensor_dim
)
override
;
};
};
class
CLImageConverterDWBlock
:
public
CLImageConverterBase
{
class
CLImageConverterDWBlock
:
public
CLImageConverterBase
{
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
override
;
void
NCHWToImage
(
float
*
tensor
,
float
*
image
,
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
float
*
image
,
const
DDim
&
tensor_dim
)
override
;
void
ImageToNCHW
(
float
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
void
ImageToNCHW
(
float
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
const
DDim
&
tensor_dim
)
override
;
};
};
class
CLImageConverterWinoTransWeight
:
public
CLImageConverterBase
{
class
CLImageConverterWinoTransWeight
:
public
CLImageConverterBase
{
public:
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
override
;
void
NCHWToImage
(
float
*
tensor
,
float
*
image
,
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
float
*
image
,
const
DDim
&
tensor_dim
)
override
;
void
ImageToNCHW
(
float
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
void
ImageToNCHW
(
float
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
const
DDim
&
tensor_dim
)
override
;
};
};
}
// namespace lite
}
// namespace lite
...
...
paddle/fluid/lite/opencl/cl_test.cc
浏览文件 @
44b09240
...
@@ -160,12 +160,11 @@ TEST(cl_test, channel_add_test) {
...
@@ -160,12 +160,11 @@ TEST(cl_test, channel_add_test) {
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
+=
stride
)
{
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
+=
stride
)
{
std
::
cout
<<
out
[
i
]
<<
" "
;
std
::
cout
<<
out
[
i
]
<<
" "
;
}
}
std
::
cout
<<
std
::
endl
;
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
++
)
{
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
++
)
{
EXPECT_NEAR
(
out
[
i
],
out_ref
[
i
],
1e-6
);
EXPECT_NEAR
(
out
[
i
],
out_ref
[
i
],
1e-6
);
}
}
std
::
cout
<<
std
::
endl
;
}
}
TEST
(
cl_test
,
elementwise_add_test
)
{
TEST
(
cl_test
,
elementwise_add_test
)
{
...
@@ -205,12 +204,86 @@ TEST(cl_test, elementwise_add_test) {
...
@@ -205,12 +204,86 @@ TEST(cl_test, elementwise_add_test) {
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
+=
stride
)
{
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
+=
stride
)
{
std
::
cout
<<
out
[
i
]
<<
" "
;
std
::
cout
<<
out
[
i
]
<<
" "
;
}
}
std
::
cout
<<
std
::
endl
;
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
++
)
{
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
++
)
{
EXPECT_NEAR
(
out
[
i
],
out_ref
[
i
],
1e-6
);
EXPECT_NEAR
(
out
[
i
],
out_ref
[
i
],
1e-6
);
}
}
}
std
::
cout
<<
std
::
endl
;
void
pool_avg
(
const
int
padding_height
,
const
int
padding_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
ksize_height
,
const
int
ksize_width
,
const
float
*
input_data
,
const
DDim
&
in_dim
,
float
*
output_data
,
const
DDim
&
out_dim
)
{
const
int
batch_size
=
in_dim
[
0
];
const
int
input_height
=
in_dim
[
2
];
const
int
input_width
=
in_dim
[
3
];
const
int
output_channels
=
out_dim
[
1
];
const
int
output_height
=
out_dim
[
2
];
const
int
output_width
=
out_dim
[
3
];
const
size_t
input_spatial_size
=
input_height
*
input_width
;
const
size_t
output_spatial_size
=
output_height
*
output_width
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
int
channel
=
i
*
output_channels
+
c
;
const
float
*
input_ptr
=
input_data
+
channel
*
input_spatial_size
;
float
*
output_ptr
=
output_data
+
channel
*
output_spatial_size
;
for
(
int
ph
=
0
;
ph
<
output_height
;
++
ph
)
{
int
hstart
=
ph
*
stride_height
-
padding_height
;
int
hend
=
std
::
min
(
hstart
+
ksize_height
,
input_height
);
hstart
=
std
::
max
(
hstart
,
0
);
for
(
int
pw
=
0
;
pw
<
output_width
;
++
pw
)
{
int
wstart
=
pw
*
stride_width
-
padding_width
;
int
wend
=
std
::
min
(
wstart
+
ksize_width
,
input_width
);
wstart
=
std
::
max
(
wstart
,
0
);
float
val
=
0.
f
;
int
count
=
0
;
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
val
+=
input_ptr
[
h
*
input_width
+
w
];
++
count
;
}
}
output_ptr
[
ph
*
output_width
+
pw
]
=
(
count
>
0
)
?
val
*
(
1.
f
/
count
)
:
0.
f
;
}
}
}
}
}
TEST
(
cl_test
,
pool_test
)
{
std
::
default_random_engine
engine
;
std
::
uniform_real_distribution
<
float
>
dist
(
-
5
,
5
);
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
4
,
1024
,
7
,
7
});
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
4
*
1024
*
7
*
7
]);
for
(
int
i
=
0
;
i
<
4
*
1024
*
7
*
7
;
i
++
)
{
in_data
[
i
]
=
dist
(
engine
);
}
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
4
,
1024
,
1
,
1
});
std
::
unique_ptr
<
float
[]
>
out
(
new
float
[
4
*
1024
*
1
*
1
]);
std
::
unique_ptr
<
float
[]
>
out_ref
(
new
float
[
4
*
1024
*
1
*
1
]);
bool
status
=
InitOpenCLEngine
(
FLAGS_cl_path
);
CHECK
(
status
)
<<
"Fail to initialize OpenCL engine."
;
std
::
unique_ptr
<
CLContext
>
context
(
new
CLContext
);
std
::
unique_ptr
<
CLHelper
>
helper
(
new
CLHelper
(
context
.
get
()));
helper
->
AddKernel
(
"pool_max"
,
"pool_kernel.cl"
);
helper
->
AddKernel
(
"pool_avg"
,
"pool_kernel.cl"
);
pool
(
helper
.
get
(),
"avg"
,
0
,
0
,
1
,
1
,
7
,
7
,
in_data
.
get
(),
in_dim
,
out
.
get
(),
out_dim
);
pool_avg
(
0
,
0
,
1
,
1
,
7
,
7
,
in_data
.
get
(),
in_dim
,
out_ref
.
get
(),
out_dim
);
for
(
int
i
=
0
;
i
<
4
*
1024
*
1
*
1
;
i
++
)
{
EXPECT_NEAR
(
out
[
i
],
out_ref
[
i
],
1e-6
);
}
}
}
}
// namespace lite
}
// namespace lite
...
...
paddle/fluid/lite/opencl/cl_wrapper.c
c
→
paddle/fluid/lite/opencl/cl_wrapper.c
xx
浏览文件 @
44b09240
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
You may obtain a copy of the License at
// You may obtain a copy of the License at
//
http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
Unless required by applicable law or agreed to in writing, software
// Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
// distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
limitations under the License. */
// limitations under the License.
// This file is borrowed from MACE, and we will refactor it
// in the near future.
#include <dlfcn.h>
#include <dlfcn.h>
#include <glog/logging.h>
#include <glog/logging.h>
...
@@ -157,58 +159,58 @@ class OpenCLLibrary final {
...
@@ -157,58 +159,58 @@ class OpenCLLibrary final {
using
clGetImageInfoFunc
=
cl_int
(
*
)(
cl_mem
,
cl_image_info
,
size_t
,
void
*
,
using
clGetImageInfoFunc
=
cl_int
(
*
)(
cl_mem
,
cl_image_info
,
size_t
,
void
*
,
size_t
*
);
size_t
*
);
#define
PADDLE_
CL_DEFINE_FUNC_PTR(func) func##Func func = nullptr
#define CL_DEFINE_FUNC_PTR(func) func##Func func = nullptr
PADDLE_
CL_DEFINE_FUNC_PTR
(
clGetPlatformIDs
);
CL_DEFINE_FUNC_PTR
(
clGetPlatformIDs
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clGetPlatformInfo
);
CL_DEFINE_FUNC_PTR
(
clGetPlatformInfo
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clBuildProgram
);
CL_DEFINE_FUNC_PTR
(
clBuildProgram
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clEnqueueNDRangeKernel
);
CL_DEFINE_FUNC_PTR
(
clEnqueueNDRangeKernel
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clSetKernelArg
);
CL_DEFINE_FUNC_PTR
(
clSetKernelArg
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clReleaseKernel
);
CL_DEFINE_FUNC_PTR
(
clReleaseKernel
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clCreateProgramWithSource
);
CL_DEFINE_FUNC_PTR
(
clCreateProgramWithSource
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clCreateBuffer
);
CL_DEFINE_FUNC_PTR
(
clCreateBuffer
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clCreateImage
);
CL_DEFINE_FUNC_PTR
(
clCreateImage
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clCreateImage2D
);
CL_DEFINE_FUNC_PTR
(
clCreateImage2D
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clCreateUserEvent
);
CL_DEFINE_FUNC_PTR
(
clCreateUserEvent
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clRetainKernel
);
CL_DEFINE_FUNC_PTR
(
clRetainKernel
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clCreateKernel
);
CL_DEFINE_FUNC_PTR
(
clCreateKernel
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clGetProgramInfo
);
CL_DEFINE_FUNC_PTR
(
clGetProgramInfo
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clFlush
);
CL_DEFINE_FUNC_PTR
(
clFlush
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clFinish
);
CL_DEFINE_FUNC_PTR
(
clFinish
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clReleaseProgram
);
CL_DEFINE_FUNC_PTR
(
clReleaseProgram
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clRetainContext
);
CL_DEFINE_FUNC_PTR
(
clRetainContext
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clGetContextInfo
);
CL_DEFINE_FUNC_PTR
(
clGetContextInfo
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clCreateProgramWithBinary
);
CL_DEFINE_FUNC_PTR
(
clCreateProgramWithBinary
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clCreateCommandQueue
);
CL_DEFINE_FUNC_PTR
(
clCreateCommandQueue
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clCreateCommandQueueWithProperties
);
CL_DEFINE_FUNC_PTR
(
clCreateCommandQueueWithProperties
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clReleaseCommandQueue
);
CL_DEFINE_FUNC_PTR
(
clReleaseCommandQueue
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clEnqueueMapBuffer
);
CL_DEFINE_FUNC_PTR
(
clEnqueueMapBuffer
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clEnqueueMapImage
);
CL_DEFINE_FUNC_PTR
(
clEnqueueMapImage
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clRetainProgram
);
CL_DEFINE_FUNC_PTR
(
clRetainProgram
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clGetProgramBuildInfo
);
CL_DEFINE_FUNC_PTR
(
clGetProgramBuildInfo
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clEnqueueReadBuffer
);
CL_DEFINE_FUNC_PTR
(
clEnqueueReadBuffer
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clEnqueueReadImage
);
CL_DEFINE_FUNC_PTR
(
clEnqueueReadImage
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clEnqueueWriteBuffer
);
CL_DEFINE_FUNC_PTR
(
clEnqueueWriteBuffer
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clWaitForEvents
);
CL_DEFINE_FUNC_PTR
(
clWaitForEvents
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clReleaseEvent
);
CL_DEFINE_FUNC_PTR
(
clReleaseEvent
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clCreateContext
);
CL_DEFINE_FUNC_PTR
(
clCreateContext
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clCreateContextFromType
);
CL_DEFINE_FUNC_PTR
(
clCreateContextFromType
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clReleaseContext
);
CL_DEFINE_FUNC_PTR
(
clReleaseContext
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clRetainCommandQueue
);
CL_DEFINE_FUNC_PTR
(
clRetainCommandQueue
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clEnqueueUnmapMemObject
);
CL_DEFINE_FUNC_PTR
(
clEnqueueUnmapMemObject
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clRetainMemObject
);
CL_DEFINE_FUNC_PTR
(
clRetainMemObject
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clReleaseMemObject
);
CL_DEFINE_FUNC_PTR
(
clReleaseMemObject
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clGetDeviceInfo
);
CL_DEFINE_FUNC_PTR
(
clGetDeviceInfo
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clGetDeviceIDs
);
CL_DEFINE_FUNC_PTR
(
clGetDeviceIDs
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clRetainDevice
);
CL_DEFINE_FUNC_PTR
(
clRetainDevice
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clReleaseDevice
);
CL_DEFINE_FUNC_PTR
(
clReleaseDevice
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clRetainEvent
);
CL_DEFINE_FUNC_PTR
(
clRetainEvent
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clGetKernelWorkGroupInfo
);
CL_DEFINE_FUNC_PTR
(
clGetKernelWorkGroupInfo
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clGetEventInfo
);
CL_DEFINE_FUNC_PTR
(
clGetEventInfo
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clGetEventProfilingInfo
);
CL_DEFINE_FUNC_PTR
(
clGetEventProfilingInfo
);
PADDLE_
CL_DEFINE_FUNC_PTR
(
clGetImageInfo
);
CL_DEFINE_FUNC_PTR
(
clGetImageInfo
);
#undef
PADDLE_
CL_DEFINE_FUNC_PTR
#undef CL_DEFINE_FUNC_PTR
private:
private:
void
*
handle_
=
nullptr
;
void
*
handle_
=
nullptr
;
...
@@ -285,7 +287,7 @@ void *OpenCLLibrary::LoadFromPath(const std::string &path) {
...
@@ -285,7 +287,7 @@ void *OpenCLLibrary::LoadFromPath(const std::string &path) {
return
nullptr
;
return
nullptr
;
}
}
#define
PADDLE_CL_ASSIGN_FROM_DLSYM(func)
\
#define
CL_ASSIGN_FROM_DLSYM(func)
\
do { \
do { \
void *ptr = dlsym(handle, #func); \
void *ptr = dlsym(handle, #func); \
if (ptr == nullptr) { \
if (ptr == nullptr) { \
...
@@ -296,56 +298,56 @@ void *OpenCLLibrary::LoadFromPath(const std::string &path) {
...
@@ -296,56 +298,56 @@ void *OpenCLLibrary::LoadFromPath(const std::string &path) {
VLOG(3) << "Loaded " << #func << " from " << path; \
VLOG(3) << "Loaded " << #func << " from " << path; \
} while (false)
} while (false)
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clGetPlatformIDs
);
CL_ASSIGN_FROM_DLSYM
(
clGetPlatformIDs
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clGetPlatformInfo
);
CL_ASSIGN_FROM_DLSYM
(
clGetPlatformInfo
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clBuildProgram
);
CL_ASSIGN_FROM_DLSYM
(
clBuildProgram
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clEnqueueNDRangeKernel
);
CL_ASSIGN_FROM_DLSYM
(
clEnqueueNDRangeKernel
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clSetKernelArg
);
CL_ASSIGN_FROM_DLSYM
(
clSetKernelArg
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clReleaseKernel
);
CL_ASSIGN_FROM_DLSYM
(
clReleaseKernel
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clCreateProgramWithSource
);
CL_ASSIGN_FROM_DLSYM
(
clCreateProgramWithSource
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clCreateBuffer
);
CL_ASSIGN_FROM_DLSYM
(
clCreateBuffer
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clCreateImage
);
CL_ASSIGN_FROM_DLSYM
(
clCreateImage
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clCreateImage2D
);
CL_ASSIGN_FROM_DLSYM
(
clCreateImage2D
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clCreateUserEvent
);
CL_ASSIGN_FROM_DLSYM
(
clCreateUserEvent
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clRetainKernel
);
CL_ASSIGN_FROM_DLSYM
(
clRetainKernel
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clCreateKernel
);
CL_ASSIGN_FROM_DLSYM
(
clCreateKernel
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clGetProgramInfo
);
CL_ASSIGN_FROM_DLSYM
(
clGetProgramInfo
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clFlush
);
CL_ASSIGN_FROM_DLSYM
(
clFlush
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clFinish
);
CL_ASSIGN_FROM_DLSYM
(
clFinish
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clReleaseProgram
);
CL_ASSIGN_FROM_DLSYM
(
clReleaseProgram
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clRetainContext
);
CL_ASSIGN_FROM_DLSYM
(
clRetainContext
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clGetContextInfo
);
CL_ASSIGN_FROM_DLSYM
(
clGetContextInfo
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clCreateProgramWithBinary
);
CL_ASSIGN_FROM_DLSYM
(
clCreateProgramWithBinary
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clCreateCommandQueue
);
CL_ASSIGN_FROM_DLSYM
(
clCreateCommandQueue
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clCreateCommandQueueWithProperties
);
CL_ASSIGN_FROM_DLSYM
(
clCreateCommandQueueWithProperties
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clReleaseCommandQueue
);
CL_ASSIGN_FROM_DLSYM
(
clReleaseCommandQueue
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clEnqueueMapBuffer
);
CL_ASSIGN_FROM_DLSYM
(
clEnqueueMapBuffer
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clEnqueueMapImage
);
CL_ASSIGN_FROM_DLSYM
(
clEnqueueMapImage
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clRetainProgram
);
CL_ASSIGN_FROM_DLSYM
(
clRetainProgram
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clGetProgramBuildInfo
);
CL_ASSIGN_FROM_DLSYM
(
clGetProgramBuildInfo
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clEnqueueReadBuffer
);
CL_ASSIGN_FROM_DLSYM
(
clEnqueueReadBuffer
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clEnqueueReadImage
);
CL_ASSIGN_FROM_DLSYM
(
clEnqueueReadImage
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clEnqueueWriteBuffer
);
CL_ASSIGN_FROM_DLSYM
(
clEnqueueWriteBuffer
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clWaitForEvents
);
CL_ASSIGN_FROM_DLSYM
(
clWaitForEvents
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clReleaseEvent
);
CL_ASSIGN_FROM_DLSYM
(
clReleaseEvent
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clCreateContext
);
CL_ASSIGN_FROM_DLSYM
(
clCreateContext
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clCreateContextFromType
);
CL_ASSIGN_FROM_DLSYM
(
clCreateContextFromType
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clReleaseContext
);
CL_ASSIGN_FROM_DLSYM
(
clReleaseContext
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clRetainCommandQueue
);
CL_ASSIGN_FROM_DLSYM
(
clRetainCommandQueue
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clEnqueueUnmapMemObject
);
CL_ASSIGN_FROM_DLSYM
(
clEnqueueUnmapMemObject
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clRetainMemObject
);
CL_ASSIGN_FROM_DLSYM
(
clRetainMemObject
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clReleaseMemObject
);
CL_ASSIGN_FROM_DLSYM
(
clReleaseMemObject
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clGetDeviceInfo
);
CL_ASSIGN_FROM_DLSYM
(
clGetDeviceInfo
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clGetDeviceIDs
);
CL_ASSIGN_FROM_DLSYM
(
clGetDeviceIDs
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clRetainDevice
);
CL_ASSIGN_FROM_DLSYM
(
clRetainDevice
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clReleaseDevice
);
CL_ASSIGN_FROM_DLSYM
(
clReleaseDevice
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clRetainEvent
);
CL_ASSIGN_FROM_DLSYM
(
clRetainEvent
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clGetKernelWorkGroupInfo
);
CL_ASSIGN_FROM_DLSYM
(
clGetKernelWorkGroupInfo
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clGetEventInfo
);
CL_ASSIGN_FROM_DLSYM
(
clGetEventInfo
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clGetEventProfilingInfo
);
CL_ASSIGN_FROM_DLSYM
(
clGetEventProfilingInfo
);
PADDLE_
CL_ASSIGN_FROM_DLSYM
(
clGetImageInfo
);
CL_ASSIGN_FROM_DLSYM
(
clGetImageInfo
);
#undef
PADDLE_
CL_ASSIGN_FROM_DLSYM
#undef CL_ASSIGN_FROM_DLSYM
return
handle
;
return
handle
;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录