Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
b92e9f9a
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b92e9f9a
编写于
2月 25, 2020
作者:
Y
Yuan Shuai
提交者:
GitHub
2月 25, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Replace uint16_t with half_t. test=develop (#2996)
上级
e68b36f5
变更
25
隐藏空白更改
内联
并排
Showing
25 changed file
with
213 addition
and
210 deletion
+213
-210
lite/kernels/opencl/CMakeLists.txt
lite/kernels/opencl/CMakeLists.txt
+2
-2
lite/kernels/opencl/activation_image_compute.cc
lite/kernels/opencl/activation_image_compute.cc
+11
-11
lite/kernels/opencl/activation_image_compute_test.cc
lite/kernels/opencl/activation_image_compute_test.cc
+7
-9
lite/kernels/opencl/concat_image_compute.cc
lite/kernels/opencl/concat_image_compute.cc
+5
-4
lite/kernels/opencl/concat_image_compute_test.cc
lite/kernels/opencl/concat_image_compute_test.cc
+3
-3
lite/kernels/opencl/conv_image_compute.cc
lite/kernels/opencl/conv_image_compute.cc
+49
-49
lite/kernels/opencl/conv_image_compute.h
lite/kernels/opencl/conv_image_compute.h
+1
-0
lite/kernels/opencl/conv_image_compute_test.cc
lite/kernels/opencl/conv_image_compute_test.cc
+44
-46
lite/kernels/opencl/depthwise_conv2d_image_compute_test.cc
lite/kernels/opencl/depthwise_conv2d_image_compute_test.cc
+22
-23
lite/kernels/opencl/elementwise_add_image_compute.cc
lite/kernels/opencl/elementwise_add_image_compute.cc
+4
-4
lite/kernels/opencl/elementwise_add_image_compute.h
lite/kernels/opencl/elementwise_add_image_compute.h
+1
-0
lite/kernels/opencl/elementwise_add_image_compute_test.cc
lite/kernels/opencl/elementwise_add_image_compute_test.cc
+9
-9
lite/kernels/opencl/elementwise_mul_image_compute.cc
lite/kernels/opencl/elementwise_mul_image_compute.cc
+5
-4
lite/kernels/opencl/elementwise_mul_image_compute_test.cc
lite/kernels/opencl/elementwise_mul_image_compute_test.cc
+9
-9
lite/kernels/opencl/fusion_elementwise_add_activation_image_compute.cc
...opencl/fusion_elementwise_add_activation_image_compute.cc
+1
-0
lite/kernels/opencl/layout_compute.cc
lite/kernels/opencl/layout_compute.cc
+3
-2
lite/kernels/opencl/layout_compute_test.cc
lite/kernels/opencl/layout_compute_test.cc
+1
-1
lite/kernels/opencl/nearest_interp_image_compute.cc
lite/kernels/opencl/nearest_interp_image_compute.cc
+7
-7
lite/kernels/opencl/nearest_interp_image_compute_test.cc
lite/kernels/opencl/nearest_interp_image_compute_test.cc
+2
-2
lite/kernels/opencl/pool_image_compute.cc
lite/kernels/opencl/pool_image_compute.cc
+3
-2
lite/kernels/opencl/pool_image_compute_test.cc
lite/kernels/opencl/pool_image_compute_test.cc
+6
-7
lite/kernels/opencl/reshape_image_compute.cc
lite/kernels/opencl/reshape_image_compute.cc
+3
-2
lite/kernels/opencl/reshape_image_compute_test.cc
lite/kernels/opencl/reshape_image_compute_test.cc
+7
-7
lite/kernels/opencl/scale_image_compute.cc
lite/kernels/opencl/scale_image_compute.cc
+3
-2
lite/kernels/opencl/scale_image_compute_test.cc
lite/kernels/opencl/scale_image_compute_test.cc
+5
-5
未找到文件。
lite/kernels/opencl/CMakeLists.txt
浏览文件 @
b92e9f9a
...
...
@@ -131,8 +131,8 @@ lite_cc_test(test_mul_buffer_opencl SRCS mul_buffer_compute_test.cc
ARGS --cl_path=
${
CMAKE_SOURCE_DIR
}
/lite/backends/opencl
)
#lite_cc_test(test_elementwise_add_buffer_opencl SRCS elementwise_add__buffer_compute_test.cc
# DEPS elementwise_add_opencl op_registry program context
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
#
DEPS elementwise_add_opencl op_registry program context
#
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test
(
test_io_copy_buffer_opencl SRCS io_copy_buffer_compute_test.cc
DEPS io_copy_opencl op_registry program context
...
...
lite/kernels/opencl/activation_image_compute.cc
浏览文件 @
b92e9f9a
...
...
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
...
...
@@ -43,9 +44,9 @@ class ReluComputeImageDefault : public KernelLite<TARGET(kOpenCL),
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
const
auto
&
x_dims
=
param
.
X
->
dims
();
auto
*
x_buf
=
param
.
X
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
x_buf
=
param
.
X
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
image_shape
=
InitImageDimInfoWith
(
x_dims
);
auto
*
out_buf
=
param
.
Out
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
out_buf
=
param
.
Out
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
image_shape
[
"width"
],
image_shape
[
"height"
]);
const
auto
&
y_dims
=
param
.
Out
->
dims
();
// useless: check dim only
...
...
@@ -111,9 +112,9 @@ class Relu6ComputeImageDefault : public KernelLite<TARGET(kOpenCL),
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
const
auto
&
x_dims
=
param
.
X
->
dims
();
auto
*
x_buf
=
param
.
X
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
x_buf
=
param
.
X
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
image_shape
=
InitImageDimInfoWith
(
x_dims
);
auto
*
out_buf
=
param
.
Out
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
out_buf
=
param
.
Out
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
image_shape
[
"width"
],
image_shape
[
"height"
]);
const
auto
&
y_dims
=
param
.
Out
->
dims
();
// useless: check dim only
auto
threshold
=
param
.
Relu_clipped_coef
;
...
...
@@ -185,14 +186,13 @@ class SigmoidComputeImageDefault
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
const
auto
&
x_dims
=
param
.
X
->
dims
();
auto
*
x_buf
=
param
.
X
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
// use
uint16
_t represents half float
param
.
X
->
data
<
half
_t
,
cl
::
Image2D
>
();
// use
half
_t represents half float
auto
image_shape
=
InitImageDimInfoWith
(
x_dims
);
auto
*
out_buf
=
param
.
Out
->
mutable_data
<
uint16_t
,
cl
::
Image2D
>
(
// use uint16_t
// represents half float
image_shape
[
"width"
],
image_shape
[
"height"
]);
auto
*
out_buf
=
param
.
Out
->
mutable_data
<
half_t
,
cl
::
Image2D
>
(
// use half_t
// represents half float
image_shape
[
"width"
],
image_shape
[
"height"
]);
const
auto
&
y_dims
=
param
.
Out
->
dims
();
// useless: check dim only
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
...
...
lite/kernels/opencl/activation_image_compute_test.cc
浏览文件 @
b92e9f9a
...
...
@@ -133,9 +133,9 @@ TEST(relu_image2d_fp16, compute) {
mapped_x
[
i
]
=
static_cast
<
int
>
(
i
)
-
x_dim
.
production
()
/
2
;
mapped_y
[
i
]
=
static_cast
<
int
>
(
0
);
}
auto
*
relu_in_data
=
relu_in
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
relu_in_data
=
relu_in
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
relu_image2d_shape
[
"width"
],
relu_image2d_shape
[
"height"
]);
auto
*
relu_out_data
=
relu_out
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
relu_out_data
=
relu_out
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
relu_image2d_shape
[
"width"
],
relu_image2d_shape
[
"height"
]);
// set context and kernel args
...
...
@@ -290,9 +290,9 @@ TEST(relu6_image2d_fp16, compute) {
mapped_x
[
i
]
=
static_cast
<
int
>
(
i
)
-
x_dim
.
production
()
/
2
;
mapped_y
[
i
]
=
static_cast
<
int
>
(
0
);
}
auto
*
relu_in_data
=
relu_in
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
relu_in_data
=
relu_in
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
relu_image2d_shape
[
"width"
],
relu_image2d_shape
[
"height"
]);
auto
*
relu_out_data
=
relu_out
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
relu_out_data
=
relu_out
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
relu_image2d_shape
[
"width"
],
relu_image2d_shape
[
"height"
]);
// set context and kernel args
...
...
@@ -447,12 +447,10 @@ TEST(sigmoid_image2d_fp16, compute) {
for
(
int
i
=
0
;
i
<
x_dim
.
production
();
++
i
)
{
mapped_x
[
i
]
=
static_cast
<
float
>
(
dist
(
engine
));
}
auto
*
sigmoid_in_data
=
sigmoid_in
.
mutable_data
<
uint16_t
,
cl
::
Image2D
>
(
sigmoid_image2d_shape
[
"width"
],
sigmoid_image2d_shape
[
"height"
]);
auto
*
sigmoid_in_data
=
sigmoid_in
.
mutable_data
<
half_t
,
cl
::
Image2D
>
(
sigmoid_image2d_shape
[
"width"
],
sigmoid_image2d_shape
[
"height"
]);
auto
*
sigmoid_out_data
=
sigmoid_out
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
sigmoid_out
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
sigmoid_image2d_shape
[
"width"
],
sigmoid_image2d_shape
[
"height"
]);
...
...
lite/kernels/opencl/concat_image_compute.cc
浏览文件 @
b92e9f9a
...
...
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
...
...
@@ -80,7 +81,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
const
auto
&
x_dims
=
param
.
output
->
dims
();
auto
image_shape
=
InitImageDimInfoWith
(
x_dims
);
auto
*
out_buf
=
param
.
output
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
out_buf
=
param
.
output
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
image_shape
[
"width"
],
image_shape
[
"height"
]);
const
auto
&
y_dims
=
param
.
output
->
dims
();
// useless: check dim only
...
...
@@ -124,8 +125,8 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
printf
(
"this axis: %d does not support
\n
"
,
axis_
);
}
if
(
inputs
.
size
()
==
2
)
{
auto
*
x_buf0
=
inputs
[
0
]
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
x_buf1
=
inputs
[
1
]
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
x_buf0
=
inputs
[
0
]
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
*
x_buf1
=
inputs
[
1
]
->
data
<
half
_t
,
cl
::
Image2D
>
();
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_buf0
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
x_buf1
);
...
...
@@ -152,7 +153,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
auto
start
=
0
;
for
(
int
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
arg_idx
=
0
;
auto
*
x_buf
=
inputs
[
i
]
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
x_buf
=
inputs
[
i
]
->
data
<
half
_t
,
cl
::
Image2D
>
();
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_buf
);
...
...
lite/kernels/opencl/concat_image_compute_test.cc
浏览文件 @
b92e9f9a
...
...
@@ -192,15 +192,15 @@ TEST(concat_image2d, compute) {
mapped_y
[
i
]
=
static_cast
<
int
>
(
0
);
}
auto
*
concat_in_data0
=
concat_in0
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
concat_in0
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
concat_image2d_shape_in0
[
"width"
],
concat_image2d_shape_in0
[
"height"
]);
auto
*
concat_in_data1
=
concat_in1
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
concat_in1
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
concat_image2d_shape_in1
[
"width"
],
concat_image2d_shape_in1
[
"height"
]);
auto
*
concat_out_data
=
concat_out
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
concat_out
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
concat_image2d_shape
[
"width"
],
concat_image2d_shape
[
"height"
]);
...
...
lite/kernels/opencl/conv_image_compute.cc
浏览文件 @
b92e9f9a
...
...
@@ -85,10 +85,10 @@ void ConvImageCompute::PrepareForRun() {
CLImageConverterNWBlock
converter
;
const
DDim
&
filter_image_dims
=
converter
.
InitImageDimInfoWith
(
filter_dims
);
std
::
vector
<
uint16
_t
>
filter_image_v
(
filter_image_dims
[
0
]
*
filter_image_dims
[
1
]
*
4
);
// 4 : RGBA
std
::
vector
<
half
_t
>
filter_image_v
(
filter_image_dims
[
0
]
*
filter_image_dims
[
1
]
*
4
);
// 4 : RGBA
converter
.
NCHWToImage
(
filter_cpu
,
filter_image_v
.
data
(),
filter_dims
);
filter_gpu_image_
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
filter_gpu_image_
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
filter_image_dims
[
0
],
filter_image_dims
[
1
],
filter_image_v
.
data
());
impl_
=
&
ConvImageCompute
::
Conv2d1x1
;
...
...
@@ -109,10 +109,10 @@ void ConvImageCompute::PrepareForRun() {
CLImageConverterNWBlock
converter
;
const
DDim
&
filter_image_dims
=
converter
.
InitImageDimInfoWith
(
filter_dims
);
std
::
vector
<
uint16
_t
>
filter_image_v
(
filter_image_dims
[
0
]
*
filter_image_dims
[
1
]
*
4
);
// 4 : RGBA
std
::
vector
<
half
_t
>
filter_image_v
(
filter_image_dims
[
0
]
*
filter_image_dims
[
1
]
*
4
);
// 4 : RGBA
converter
.
NCHWToImage
(
filter_cpu
,
filter_image_v
.
data
(),
filter_dims
);
filter_gpu_image_
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
filter_gpu_image_
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
filter_image_dims
[
0
],
filter_image_dims
[
1
],
filter_image_v
.
data
());
}
else
if
(
filter_dims
[
1
]
==
1
&&
x_dims
[
1
]
==
output_dims
[
1
]
&&
kernel_h
!=
3
)
{
...
...
@@ -123,10 +123,10 @@ void ConvImageCompute::PrepareForRun() {
CLImageConverterNWBlock
converter
;
const
DDim
&
filter_image_dims
=
converter
.
InitImageDimInfoWith
(
filter_dims
);
std
::
vector
<
uint16
_t
>
filter_image_v
(
filter_image_dims
[
0
]
*
filter_image_dims
[
1
]
*
4
);
// 4 : RGBA
std
::
vector
<
half
_t
>
filter_image_v
(
filter_image_dims
[
0
]
*
filter_image_dims
[
1
]
*
4
);
// 4 : RGBA
converter
.
NCHWToImage
(
filter_cpu
,
filter_image_v
.
data
(),
filter_dims
);
filter_gpu_image_
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
filter_gpu_image_
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
filter_image_dims
[
0
],
filter_image_dims
[
1
],
filter_image_v
.
data
());
impl_
=
&
ConvImageCompute
::
DepthwiseConv2d
;
...
...
@@ -137,10 +137,10 @@ void ConvImageCompute::PrepareForRun() {
CLImageConverterFolder
converter
;
const
DDim
&
filter_image_dims
=
converter
.
InitImageDimInfoWith
(
filter_dims
);
std
::
vector
<
uint16
_t
>
filter_image_v
(
filter_image_dims
[
0
]
*
filter_image_dims
[
1
]
*
4
);
// 4 : RGBA
std
::
vector
<
half
_t
>
filter_image_v
(
filter_image_dims
[
0
]
*
filter_image_dims
[
1
]
*
4
);
// 4 : RGBA
converter
.
NCHWToImage
(
filter_cpu
,
filter_image_v
.
data
(),
filter_dims
);
filter_gpu_image_
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
filter_gpu_image_
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
filter_image_dims
[
0
],
filter_image_dims
[
1
],
filter_image_v
.
data
());
impl_
=
&
ConvImageCompute
::
Conv2d3x3
;
...
...
@@ -151,10 +151,10 @@ void ConvImageCompute::PrepareForRun() {
CLImageConverterFolder
converter
;
const
DDim
&
filter_image_dims
=
converter
.
InitImageDimInfoWith
(
filter_dims
);
std
::
vector
<
uint16
_t
>
filter_image_v
(
filter_image_dims
[
0
]
*
filter_image_dims
[
1
]
*
4
);
// 4 : RGBA
std
::
vector
<
half
_t
>
filter_image_v
(
filter_image_dims
[
0
]
*
filter_image_dims
[
1
]
*
4
);
// 4 : RGBA
converter
.
NCHWToImage
(
filter_cpu
,
filter_image_v
.
data
(),
filter_dims
);
filter_gpu_image_
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
filter_gpu_image_
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
filter_image_dims
[
0
],
filter_image_dims
[
1
],
filter_image_v
.
data
());
impl_
=
&
ConvImageCompute
::
Conv2d5x5
;
...
...
@@ -165,10 +165,10 @@ void ConvImageCompute::PrepareForRun() {
CLImageConverterFolder
converter
;
const
DDim
&
filter_image_dims
=
converter
.
InitImageDimInfoWith
(
filter_dims
);
std
::
vector
<
uint16
_t
>
filter_image_v
(
filter_image_dims
[
0
]
*
filter_image_dims
[
1
]
*
4
);
// 4 : RGBA
std
::
vector
<
half
_t
>
filter_image_v
(
filter_image_dims
[
0
]
*
filter_image_dims
[
1
]
*
4
);
// 4 : RGBA
converter
.
NCHWToImage
(
filter_cpu
,
filter_image_v
.
data
(),
filter_dims
);
this
->
filter_gpu_image_
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
this
->
filter_gpu_image_
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
filter_image_dims
[
0
],
filter_image_dims
[
1
],
filter_image_v
.
data
());
impl_
=
&
ConvImageCompute
::
Conv2d7x7
;
...
...
@@ -200,12 +200,12 @@ void ConvImageCompute::PrepareForRun() {
CLImageConverterFolder
bias_converter
;
const
DDim
&
bias_image_dims
=
bias_converter
.
InitImageDimInfoWith
(
param
.
bias
->
dims
());
std
::
vector
<
uint16
_t
>
bias_image_v
(
bias_image_dims
[
0
]
*
bias_image_dims
[
1
]
*
4
);
std
::
vector
<
half
_t
>
bias_image_v
(
bias_image_dims
[
0
]
*
bias_image_dims
[
1
]
*
4
);
float
*
bias_cpu_data
=
param
.
bias
->
mutable_data
<
float
>
();
bias_converter
.
NCHWToImage
(
bias_cpu_data
,
bias_image_v
.
data
(),
param
.
bias
->
dims
());
this
->
bias_gpu_image_
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
this
->
bias_gpu_image_
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
bias_image_dims
[
0
],
bias_image_dims
[
1
],
bias_image_v
.
data
());
// convert cpu buffer bias --> gpu image --- end ----
}
...
...
@@ -223,8 +223,8 @@ void ConvImageCompute::Conv2d1x1() {
auto
input_dims
=
param
.
x
->
dims
();
auto
paddings
=
*
param
.
paddings
;
auto
strides
=
param
.
strides
;
auto
*
input_image
=
param
.
x
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
filter_image
=
filter_gpu_image_
.
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
input_image
=
param
.
x
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
*
filter_image
=
filter_gpu_image_
.
data
<
half
_t
,
cl
::
Image2D
>
();
auto
filter_dims
=
param
.
filter
->
dims
();
auto
output_dims
=
param
.
output
->
dims
();
...
...
@@ -233,7 +233,7 @@ void ConvImageCompute::Conv2d1x1() {
int
output_width
=
output_dims
[
3
];
int
output_height
=
output_dims
[
2
];
auto
out_image_shape
=
InitImageDimInfoWith
(
output_dims
);
auto
*
out_image
=
param
.
output
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
out_image
=
param
.
output
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_image_shape
[
"width"
],
out_image_shape
[
"height"
]);
const
bool
has_bias
=
param
.
bias
!=
nullptr
;
...
...
@@ -292,7 +292,7 @@ void ConvImageCompute::Conv2d1x1() {
const
cl
::
Buffer
*
bias_buf
=
nullptr
;
const
cl
::
Image2D
*
bias_image
=
nullptr
;
if
(
has_bias
)
{
bias_image
=
bias_gpu_image_
.
data
<
uint16
_t
,
cl
::
Image2D
>
();
bias_image
=
bias_gpu_image_
.
data
<
half
_t
,
cl
::
Image2D
>
();
}
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
...
...
@@ -373,8 +373,8 @@ void ConvImageCompute::Conv2d3x3() {
auto
paddings
=
*
param
.
paddings
;
auto
strides
=
param
.
strides
;
auto
*
input_image
=
param
.
x
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
filter_image
=
filter_gpu_image_
.
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
input_image
=
param
.
x
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
*
filter_image
=
filter_gpu_image_
.
data
<
half
_t
,
cl
::
Image2D
>
();
auto
filter_dims
=
param
.
filter
->
dims
();
auto
output_dims
=
param
.
output
->
dims
();
...
...
@@ -388,7 +388,7 @@ void ConvImageCompute::Conv2d3x3() {
int
filter_height
=
filter_dims
[
2
];
int
filter_channel
=
filter_dims
[
1
];
auto
out_image_shape
=
InitImageDimInfoWith
(
output_dims
);
auto
*
out_image
=
param
.
output
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
out_image
=
param
.
output
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_image_shape
[
"width"
],
out_image_shape
[
"height"
]);
const
bool
has_bias
=
param
.
bias
!=
nullptr
;
...
...
@@ -464,7 +464,7 @@ void ConvImageCompute::Conv2d3x3() {
const
cl
::
Image2D
*
bias_image
=
nullptr
;
if
(
has_bias
)
{
bias_image
=
bias_gpu_image_
.
data
<
uint16
_t
,
cl
::
Image2D
>
();
bias_image
=
bias_gpu_image_
.
data
<
half
_t
,
cl
::
Image2D
>
();
}
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
...
...
@@ -549,8 +549,8 @@ void ConvImageCompute::Conv2d5x5() {
auto
input_dims
=
param
.
x
->
dims
();
auto
paddings
=
*
param
.
paddings
;
auto
strides
=
param
.
strides
;
auto
*
input_image
=
param
.
x
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
filter_image
=
filter_gpu_image_
.
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
input_image
=
param
.
x
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
*
filter_image
=
filter_gpu_image_
.
data
<
half
_t
,
cl
::
Image2D
>
();
auto
filter_dims
=
param
.
filter
->
dims
();
auto
output_dims
=
param
.
output
->
dims
();
...
...
@@ -561,7 +561,7 @@ void ConvImageCompute::Conv2d5x5() {
int
filter_width
=
filter_dims
[
3
];
int
filter_height
=
filter_dims
[
2
];
auto
out_image_shape
=
InitImageDimInfoWith
(
output_dims
);
auto
*
out_image
=
param
.
output
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
out_image
=
param
.
output
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_image_shape
[
"width"
],
out_image_shape
[
"height"
]);
const
bool
has_bias
=
param
.
bias
!=
nullptr
;
...
...
@@ -619,7 +619,7 @@ void ConvImageCompute::Conv2d5x5() {
const
cl
::
Image2D
*
bias_image
=
nullptr
;
if
(
has_bias
)
{
bias_image
=
bias_gpu_image_
.
data
<
uint16
_t
,
cl
::
Image2D
>
();
bias_image
=
bias_gpu_image_
.
data
<
half
_t
,
cl
::
Image2D
>
();
}
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
...
...
@@ -694,8 +694,8 @@ void ConvImageCompute::Conv2d7x7() {
auto
input_dims
=
param
.
x
->
dims
();
auto
paddings
=
*
param
.
paddings
;
auto
strides
=
param
.
strides
;
auto
*
input_image
=
param
.
x
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
filter_image
=
filter_gpu_image_
.
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
input_image
=
param
.
x
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
*
filter_image
=
filter_gpu_image_
.
data
<
half
_t
,
cl
::
Image2D
>
();
auto
filter_dims
=
param
.
filter
->
dims
();
auto
output_dims
=
param
.
output
->
dims
();
...
...
@@ -706,7 +706,7 @@ void ConvImageCompute::Conv2d7x7() {
int
filter_width
=
filter_dims
[
3
];
int
filter_height
=
filter_dims
[
2
];
auto
out_image_shape
=
InitImageDimInfoWith
(
output_dims
);
auto
*
out_image
=
param
.
output
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
out_image
=
param
.
output
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_image_shape
[
"width"
],
out_image_shape
[
"height"
]);
const
bool
has_bias
=
param
.
bias
!=
nullptr
;
...
...
@@ -764,7 +764,7 @@ void ConvImageCompute::Conv2d7x7() {
const
cl
::
Image2D
*
bias_image
=
nullptr
;
if
(
has_bias
)
{
bias_image
=
bias_gpu_image_
.
data
<
uint16
_t
,
cl
::
Image2D
>
();
bias_image
=
bias_gpu_image_
.
data
<
half
_t
,
cl
::
Image2D
>
();
}
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
...
...
@@ -845,17 +845,17 @@ void ConvImageCompute::DepthwiseConv2d3x3s1() {
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
CHECK
(
context
.
cl_context
()
!=
nullptr
);
auto
*
input_img
=
param
.
x
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
filter_img
=
filter_gpu_image_
.
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
input_img
=
param
.
x
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
*
filter_img
=
filter_gpu_image_
.
data
<
half
_t
,
cl
::
Image2D
>
();
const
cl
::
Image2D
*
bias_img
=
nullptr
;
if
(
param
.
bias
)
{
bias_img
=
bias_gpu_image_
.
data
<
uint16
_t
,
cl
::
Image2D
>
();
bias_img
=
bias_gpu_image_
.
data
<
half
_t
,
cl
::
Image2D
>
();
}
auto
image_shape
=
InitImageDimInfoWith
(
output_dims
);
auto
*
output_img
=
param
.
output
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
output_img
=
param
.
output
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
image_shape
[
"width"
],
image_shape
[
"height"
]);
STL
::
stringstream
kernel_key
;
...
...
@@ -926,17 +926,17 @@ void ConvImageCompute::DepthwiseConv2d3x3() {
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
CHECK
(
context
.
cl_context
()
!=
nullptr
);
auto
*
input_img
=
param
.
x
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
filter_img
=
filter_gpu_image_
.
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
input_img
=
param
.
x
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
*
filter_img
=
filter_gpu_image_
.
data
<
half
_t
,
cl
::
Image2D
>
();
const
cl
::
Image2D
*
bias_img
=
nullptr
;
if
(
param
.
bias
)
{
bias_img
=
bias_gpu_image_
.
data
<
uint16
_t
,
cl
::
Image2D
>
();
bias_img
=
bias_gpu_image_
.
data
<
half
_t
,
cl
::
Image2D
>
();
}
auto
image_shape
=
InitImageDimInfoWith
(
output_dims
);
auto
*
output_img
=
param
.
output
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
output_img
=
param
.
output
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
image_shape
[
"width"
],
image_shape
[
"height"
]);
STL
::
stringstream
kernel_key
;
...
...
@@ -1009,8 +1009,8 @@ void ConvImageCompute::DepthwiseConv2d() {
auto
input_dims
=
param
.
x
->
dims
();
auto
paddings
=
*
param
.
paddings
;
auto
strides
=
param
.
strides
;
auto
*
input_image
=
param
.
x
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
filter_image
=
filter_gpu_image_
.
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
input_image
=
param
.
x
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
*
filter_image
=
filter_gpu_image_
.
data
<
half
_t
,
cl
::
Image2D
>
();
auto
filter_dims
=
param
.
filter
->
dims
();
auto
output_dims
=
param
.
output
->
dims
();
...
...
@@ -1021,7 +1021,7 @@ void ConvImageCompute::DepthwiseConv2d() {
int
filter_width
=
filter_dims
[
3
];
int
filter_height
=
filter_dims
[
2
];
auto
out_image_shape
=
InitImageDimInfoWith
(
output_dims
);
auto
*
out_image
=
param
.
output
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
out_image
=
param
.
output
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_image_shape
[
"width"
],
out_image_shape
[
"height"
]);
const
bool
has_bias
=
param
.
bias
!=
nullptr
;
...
...
@@ -1080,7 +1080,7 @@ void ConvImageCompute::DepthwiseConv2d() {
const
cl
::
Buffer
*
bias_buf
=
nullptr
;
const
cl
::
Image2D
*
bias_image
=
nullptr
;
if
(
has_bias
)
{
bias_image
=
bias_gpu_image_
.
data
<
uint16
_t
,
cl
::
Image2D
>
();
bias_image
=
bias_gpu_image_
.
data
<
half
_t
,
cl
::
Image2D
>
();
}
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
...
...
lite/kernels/opencl/conv_image_compute.h
浏览文件 @
b92e9f9a
...
...
@@ -18,6 +18,7 @@
#include <string>
#include <vector>
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/tensor.h"
...
...
lite/kernels/opencl/conv_image_compute_test.cc
浏览文件 @
b92e9f9a
...
...
@@ -284,13 +284,13 @@ TEST(conv2d, compute_image2d_1x1) {
paddle
::
lite
::
CLImageConverterDefault
default_convertor
;
SHADOW_LOG
<<
"set mapped input ..."
;
std
::
vector
<
uint16
_t
>
x_image_v
(
std
::
vector
<
half
_t
>
x_image_v
(
input_image_width
*
input_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
uint16
_t
>
filter_image_v
(
std
::
vector
<
half
_t
>
filter_image_v
(
filter_image_width
*
filter_image_height
*
4
);
// 4 :RGBA
std
::
vector
<
uint16
_t
>
bias_image_v
(
std
::
vector
<
half
_t
>
bias_image_v
(
bias_image_width
*
bias_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
uint16
_t
>
out_image_v
(
std
::
vector
<
half
_t
>
out_image_v
(
out_image_width
*
out_image_height
*
4
);
// 4 : RGBA
default_convertor
.
NCHWToImage
(
...
...
@@ -301,13 +301,13 @@ TEST(conv2d, compute_image2d_1x1) {
nw_convertor
.
NCHWToImage
(
filter_v
.
data
(),
filter_image_v
.
data
(),
filter_dim
);
auto
*
input_image2d
=
input
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
input_image2d
=
input
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
input_image_width
,
input_image_height
,
x_image_v
.
data
());
// assign filter as target arm
filter
.
Assign
<
float
,
lite
::
DDim
,
TARGET
(
kARM
)
>
(
filter_v
.
data
(),
filter_dim
);
// auto* filter_image2d =
// filter.mutable_data<
uint16
_t, cl::Image2D>(
// filter.mutable_data<
half
_t, cl::Image2D>(
// filter_image_width,
// filter_image_height,
// filter_image_v.data());
...
...
@@ -357,12 +357,11 @@ TEST(conv2d, compute_image2d_1x1) {
SHADOW_LOG
<<
"kernel launch ..."
;
kernel
->
Launch
();
SHADOW_LOG
<<
"mutable output ..."
;
auto
*
output_image2d
=
output
.
mutable_data
<
uint16_t
,
cl
::
Image2D
>
(
out_image_width
,
out_image_height
);
auto
*
output_image2d
=
output
.
mutable_data
<
half_t
,
cl
::
Image2D
>
(
out_image_width
,
out_image_height
);
auto
*
wait_list
=
context
->
As
<
OpenCLContext
>
().
cl_wait_list
();
auto
*
out_ptr
=
param
.
output
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
out_ptr
=
param
.
output
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
it
=
wait_list
->
find
(
out_ptr
);
if
(
it
!=
wait_list
->
end
())
{
...
...
@@ -375,14 +374,13 @@ TEST(conv2d, compute_image2d_1x1) {
"cl tensor."
;
}
TargetWrapperCL
::
ImgcpySync
(
out_image_v
.
data
(),
output
.
data
<
uint16_t
,
cl
::
Image2D
>
(),
out_image_width
,
out_image_height
,
cl_image2d_row_pitch
,
cl_image2d_slice_pitch
,
IoDirection
::
DtoH
);
TargetWrapperCL
::
ImgcpySync
(
out_image_v
.
data
(),
output
.
data
<
half_t
,
cl
::
Image2D
>
(),
out_image_width
,
out_image_height
,
cl_image2d_row_pitch
,
cl_image2d_slice_pitch
,
IoDirection
::
DtoH
);
DDim
out_image_shape
=
default_convertor
.
InitImageDimInfoWith
(
output
.
dims
());
...
...
@@ -641,14 +639,14 @@ TEST(conv2d, compute_image2d_3x3) {
paddle
::
lite
::
CLImageConverterDefault
default_convertor
;
SHADOW_LOG
<<
"set mapped input ..."
;
std
::
vector
<
uint16_t
>
x_image_v
(
input_image_width
*
input_image_height
*
4
);
// 4 :RGBA
std
::
vector
<
uint16
_t
>
filter_image_v
(
std
::
vector
<
half_t
>
x_image_v
(
input_image_width
*
input_image_height
*
4
);
// 4 :RGBA
std
::
vector
<
half
_t
>
filter_image_v
(
filter_image_width
*
filter_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
uint16
_t
>
bias_image_v
(
std
::
vector
<
half
_t
>
bias_image_v
(
bias_image_width
*
bias_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
uint16_t
>
out_image_v
(
out_image_width
*
out_image_height
*
4
);
// 4 :RGBA
std
::
vector
<
half_t
>
out_image_v
(
out_image_width
*
out_image_height
*
4
);
// 4 :RGBA
default_convertor
.
NCHWToImage
(
input_v
.
data
(),
x_image_v
.
data
(),
input_dim
);
...
...
@@ -673,7 +671,7 @@ TEST(conv2d, compute_image2d_3x3) {
for
(
int
i
=
0
;
i
<
filter_image_v
.
size
();
i
++
)
{
SHADOW_LOG
<<
"("
<<
i
<<
")"
<<
filter_image_v
[
i
];
}
auto
*
input_image2d
=
input
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
input_image2d
=
input
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
input_image_width
,
input_image_height
,
x_image_v
.
data
());
// assign filter as target arm
filter
.
Assign
<
float
,
lite
::
DDim
,
TARGET
(
kARM
)
>
(
filter_v
.
data
(),
...
...
@@ -714,11 +712,11 @@ TEST(conv2d, compute_image2d_3x3) {
SHADOW_LOG
<<
"kernel launch ..."
;
kernel
->
Launch
();
SHADOW_LOG
<<
"mutable output ..."
;
auto
*
output_image2d
=
output
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
output_image2d
=
output
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_image_width
,
out_image_height
);
auto
*
wait_list
=
context
->
As
<
OpenCLContext
>
().
cl_wait_list
();
auto
*
out_ptr
=
param
.
output
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
out_ptr
=
param
.
output
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
it
=
wait_list
->
find
(
out_ptr
);
if
(
it
!=
wait_list
->
end
())
{
...
...
@@ -732,7 +730,7 @@ TEST(conv2d, compute_image2d_3x3) {
}
TargetWrapperCL
::
ImgcpySync
(
out_image_v
.
data
(),
output
.
data
<
uint16
_t
,
cl
::
Image2D
>
(),
output
.
data
<
half
_t
,
cl
::
Image2D
>
(),
out_image_width
,
out_image_height
,
cl_image2d_row_pitch
,
...
...
@@ -987,14 +985,14 @@ TEST(conv2d, compute_image2d_5x5) {
paddle
::
lite
::
CLImageConverterDefault
default_convertor
;
SHADOW_LOG
<<
"set mapped input ..."
;
std
::
vector
<
uint16_t
>
x_image_v
(
input_image_width
*
input_image_height
*
4
);
// 4 :RGBA
std
::
vector
<
uint16
_t
>
filter_image_v
(
std
::
vector
<
half_t
>
x_image_v
(
input_image_width
*
input_image_height
*
4
);
// 4 :RGBA
std
::
vector
<
half
_t
>
filter_image_v
(
filter_image_width
*
filter_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
uint16
_t
>
bias_image_v
(
std
::
vector
<
half
_t
>
bias_image_v
(
bias_image_width
*
bias_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
uint16_t
>
out_image_v
(
out_image_width
*
out_image_height
*
4
);
// 4 :RGBA
std
::
vector
<
half_t
>
out_image_v
(
out_image_width
*
out_image_height
*
4
);
// 4 :RGBA
default_convertor
.
NCHWToImage
(
input_v
.
data
(),
x_image_v
.
data
(),
input_dim
);
...
...
@@ -1019,7 +1017,7 @@ TEST(conv2d, compute_image2d_5x5) {
for
(
int
i
=
0
;
i
<
filter_image_v
.
size
();
i
++
)
{
SHADOW_LOG
<<
"("
<<
i
<<
")"
<<
filter_image_v
[
i
];
}
auto
*
input_image2d
=
input
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
input_image2d
=
input
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
input_image_width
,
input_image_height
,
x_image_v
.
data
());
// assign filter as target arm
filter
.
Assign
<
float
,
lite
::
DDim
,
TARGET
(
kARM
)
>
(
filter_v
.
data
(),
...
...
@@ -1060,11 +1058,11 @@ TEST(conv2d, compute_image2d_5x5) {
SHADOW_LOG
<<
"kernel launch ..."
;
kernel
->
Launch
();
SHADOW_LOG
<<
"mutable output ..."
;
auto
*
output_image2d
=
output
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
output_image2d
=
output
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_image_width
,
out_image_height
);
auto
*
wait_list
=
context
->
As
<
OpenCLContext
>
().
cl_wait_list
();
auto
*
out_ptr
=
param
.
output
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
out_ptr
=
param
.
output
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
it
=
wait_list
->
find
(
out_ptr
);
if
(
it
!=
wait_list
->
end
())
{
...
...
@@ -1078,7 +1076,7 @@ TEST(conv2d, compute_image2d_5x5) {
}
TargetWrapperCL
::
ImgcpySync
(
out_image_v
.
data
(),
output
.
data
<
uint16
_t
,
cl
::
Image2D
>
(),
output
.
data
<
half
_t
,
cl
::
Image2D
>
(),
out_image_width
,
out_image_height
,
cl_image2d_row_pitch
,
...
...
@@ -1325,13 +1323,13 @@ TEST(conv2d, compute_image2d_7x7) {
paddle
::
lite
::
CLImageConverterDefault
default_convertor
;
SHADOW_LOG
<<
"set mapped input ..."
;
std
::
vector
<
uint16
_t
>
x_image_v
(
std
::
vector
<
half
_t
>
x_image_v
(
input_image_width
*
input_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
uint16
_t
>
filter_image_v
(
std
::
vector
<
half
_t
>
filter_image_v
(
filter_image_width
*
filter_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
uint16
_t
>
bias_image_v
(
std
::
vector
<
half
_t
>
bias_image_v
(
bias_image_width
*
bias_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
uint16
_t
>
out_image_v
(
std
::
vector
<
half
_t
>
out_image_v
(
out_image_width
*
out_image_height
*
4
);
// 4 : RGBA
default_convertor
.
NCHWToImage
(
...
...
@@ -1357,7 +1355,7 @@ TEST(conv2d, compute_image2d_7x7) {
for
(
int
i
=
0
;
i
<
filter_image_v
.
size
();
i
++
)
{
SHADOW_LOG
<<
"("
<<
i
<<
")"
<<
filter_image_v
[
i
];
}
auto
*
input_image2d
=
input
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
input_image2d
=
input
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
input_image_width
,
input_image_height
,
x_image_v
.
data
());
// assign filter as target arm
...
...
@@ -1399,11 +1397,11 @@ TEST(conv2d, compute_image2d_7x7) {
SHADOW_LOG
<<
"kernel launch ..."
;
kernel
->
Launch
();
SHADOW_LOG
<<
"mutable output ..."
;
auto
*
output_image2d
=
output
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
output_image2d
=
output
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_image_width
,
out_image_height
);
auto
*
wait_list
=
context
->
As
<
OpenCLContext
>
().
cl_wait_list
();
auto
*
out_ptr
=
param
.
output
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
out_ptr
=
param
.
output
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
it
=
wait_list
->
find
(
out_ptr
);
if
(
it
!=
wait_list
->
end
())
{
...
...
@@ -1417,7 +1415,7 @@ TEST(conv2d, compute_image2d_7x7) {
}
TargetWrapperCL
::
ImgcpySync
(
out_image_v
.
data
(),
output
.
data
<
uint16
_t
,
cl
::
Image2D
>
(),
output
.
data
<
half
_t
,
cl
::
Image2D
>
(),
out_image_width
,
out_image_height
,
cl_image2d_row_pitch
,
...
...
lite/kernels/opencl/depthwise_conv2d_image_compute_test.cc
浏览文件 @
b92e9f9a
...
...
@@ -253,14 +253,14 @@ TEST(depthwise_conv2d_basic, compute) {
paddle
::
lite
::
CLImageConverterDefault
default_convertor
;
VLOG
(
4
)
<<
"set mapped input ..."
;
std
::
vector
<
uint16_t
>
x_image_v
(
input_image_width
*
input_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
uint16
_t
>
filter_image_v
(
std
::
vector
<
half_t
>
x_image_v
(
input_image_width
*
input_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
half
_t
>
filter_image_v
(
filter_image_width
*
filter_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
uint16_t
>
bias_image_v
(
bias_image_width
*
bias_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
uint16
_t
>
out_image_v
(
out_image_width
*
out_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
half_t
>
bias_image_v
(
bias_image_width
*
bias_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
half
_t
>
out_image_v
(
out_image_width
*
out_image_height
*
4
);
// 4 : RGBA
default_convertor
.
NCHWToImage
(
input_v
.
data
(),
x_image_v
.
data
(),
input_dim
);
...
...
@@ -270,9 +270,9 @@ TEST(depthwise_conv2d_basic, compute) {
nw_convertor
.
NCHWToImage
(
filter_v
.
data
(),
filter_image_v
.
data
(),
filter_dim
);
auto
*
input_image2d
=
input
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
input_image2d
=
input
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
input_image_width
,
input_image_height
,
x_image_v
.
data
());
auto
*
filter_image2d
=
filter
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
filter_image2d
=
filter
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
filter_image_width
,
filter_image_height
,
filter_image_v
.
data
());
if
(
bias_flag
)
{
...
...
@@ -285,7 +285,7 @@ TEST(depthwise_conv2d_basic, compute) {
CLImageConverterFolder
folder_convertor
;
folder_convertor
.
NCHWToImage
(
bias_v
.
data
(),
bias_image_v
.
data
(),
bias_dim
);
auto
*
bias_data
=
bias
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
bias_data
=
bias
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
bias_image_width
,
bias_image_height
,
bias_image_v
.
data
());
}
...
...
@@ -301,11 +301,11 @@ TEST(depthwise_conv2d_basic, compute) {
VLOG
(
4
)
<<
"kernel launch ..."
;
kernel
->
Launch
();
VLOG
(
4
)
<<
"mutable output ..."
;
auto
*
output_image2d
=
output
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
output_image2d
=
output
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_image_width
,
out_image_height
);
auto
*
wait_list
=
context
->
As
<
OpenCLContext
>
().
cl_wait_list
();
auto
*
out_ptr
=
param
.
output
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
out_ptr
=
param
.
output
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
it
=
wait_list
->
find
(
out_ptr
);
if
(
it
!=
wait_list
->
end
())
{
...
...
@@ -319,7 +319,7 @@ TEST(depthwise_conv2d_basic, compute) {
}
TargetWrapperCL
::
ImgcpySync
(
out_image_v
.
data
(),
output
.
data
<
uint16
_t
,
cl
::
Image2D
>
(),
output
.
data
<
half
_t
,
cl
::
Image2D
>
(),
out_image_width
,
out_image_height
,
cl_image2d_row_pitch
,
...
...
@@ -434,11 +434,11 @@ TEST(depthwise_conv2d_image2d_fp16, compute) {
default_converter
->
InitImageDimInfoWith
(
input
.
dims
());
LOG
(
INFO
)
<<
"input_image_shape = "
<<
input_image_shape
[
0
]
<<
" "
<<
input_image_shape
[
1
];
std
::
vector
<
uint16
_t
>
input_image_data
(
input_image_shape
.
production
()
*
4
);
// 4 : RGBA
std
::
vector
<
half
_t
>
input_image_data
(
input_image_shape
.
production
()
*
4
);
// 4 : RGBA
default_converter
->
NCHWToImage
(
input_v
.
data
(),
input_image_data
.
data
(),
input
.
dims
());
auto
*
input_image
=
input
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
input_image
=
input
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
input_image_shape
[
0
],
input_image_shape
[
1
],
input_image_data
.
data
());
LOG
(
INFO
)
<<
"prepare kernel"
;
...
...
@@ -447,11 +447,11 @@ TEST(depthwise_conv2d_image2d_fp16, compute) {
DDim
filter_image_shape
=
nw_converter
->
InitImageDimInfoWith
(
filter
.
dims
());
LOG
(
INFO
)
<<
"filter_image_shape = "
<<
filter_image_shape
[
0
]
<<
" "
<<
filter_image_shape
[
1
];
std
::
vector
<
uint16
_t
>
filter_image_data
(
filter_image_shape
.
production
()
*
4
);
// 4 : RGBA
std
::
vector
<
half
_t
>
filter_image_data
(
filter_image_shape
.
production
()
*
4
);
// 4 : RGBA
nw_converter
->
NCHWToImage
(
filter_v
.
data
(),
filter_image_data
.
data
(),
filter
.
dims
());
auto
*
filter_image
=
filter
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
filter_image
=
filter
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
filter_image_shape
[
0
],
filter_image_shape
[
1
],
filter_image_data
.
data
());
LOG
(
INFO
)
<<
"launch"
;
...
...
@@ -460,13 +460,13 @@ TEST(depthwise_conv2d_image2d_fp16, compute) {
default_converter
->
InitImageDimInfoWith
(
output
.
dims
());
LOG
(
INFO
)
<<
"output_image_shape = "
<<
output_image_shape
[
0
]
<<
" "
<<
output_image_shape
[
1
];
auto
*
output_image
=
output
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
output_image
=
output
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
output_image_shape
[
0
],
output_image_shape
[
1
]);
kernel
->
Launch
();
auto
*
wait_list
=
context
->
As
<
OpenCLContext
>
().
cl_wait_list
();
auto
*
out_ptr
=
param
.
output
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
out_ptr
=
param
.
output
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
it
=
wait_list
->
find
(
out_ptr
);
if
(
it
!=
wait_list
->
end
())
{
VLOG
(
4
)
<<
"--- Find the sync event for the target cl tensor. ---"
;
...
...
@@ -491,8 +491,7 @@ TEST(depthwise_conv2d_image2d_fp16, compute) {
const
size_t
cl_image2d_row_pitch
{
0
};
const
size_t
cl_image2d_slice_pitch
{
0
};
uint16_t
*
output_image_data
=
new
uint16_t
[
output_image_shape
.
production
()
*
4
];
half_t
*
output_image_data
=
new
half_t
[
output_image_shape
.
production
()
*
4
];
TargetWrapperCL
::
ImgcpySync
(
output_image_data
,
output_image
,
output_image_shape
[
0
],
...
...
lite/kernels/opencl/elementwise_add_image_compute.cc
浏览文件 @
b92e9f9a
...
...
@@ -78,10 +78,10 @@ void ElementwiseAddImageCompute::Run() {
default_convertor
.
InitImageDimInfoWith
(
out
->
dims
());
// w, h
auto
y_img_shape
=
default_convertor
.
InitImageDimInfoWith
(
y
->
dims
());
auto
*
x_img
=
x
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
y_img
=
y
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
out_img
=
out
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
out_img_shape
[
0
],
out_img_shape
[
1
]);
auto
*
x_img
=
x
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
*
y_img
=
y
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
*
out_img
=
out
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_img_shape
[
0
],
out_img_shape
[
1
]);
VLOG
(
4
)
<<
"x_img_shape[w,h]:"
<<
x_img_width
<<
" "
<<
x_img_height
;
VLOG
(
4
)
<<
"y_img_shape[w,h]:"
<<
y_img_shape
[
0
]
<<
" "
<<
y_img_shape
[
1
];
...
...
lite/kernels/opencl/elementwise_add_image_compute.h
浏览文件 @
b92e9f9a
...
...
@@ -15,6 +15,7 @@
#include <memory>
#include <string>
#include "lite/backends/opencl/cl_half.h"
#include "lite/core/kernel.h"
#include "lite/operators/op_params.h"
#include "lite/utils/cp_logging.h"
...
...
lite/kernels/opencl/elementwise_add_image_compute_test.cc
浏览文件 @
b92e9f9a
...
...
@@ -158,9 +158,9 @@ TEST(elementwise_add_image, compute) {
auto
x_img_shape
=
default_convertor
.
InitImageDimInfoWith
(
x_dim
);
// w, h
auto
x_img_w
=
x_img_shape
[
0
];
auto
x_img_h
=
x_img_shape
[
1
];
std
::
vector
<
uint16
_t
>
x_img_v
(
x_img_w
*
x_img_h
*
4
);
// 4: RGBA
std
::
vector
<
half
_t
>
x_img_v
(
x_img_w
*
x_img_h
*
4
);
// 4: RGBA
default_convertor
.
NCHWToImage
(
x_v
.
data
(),
x_img_v
.
data
(),
x_dim
);
eleadd_x
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
eleadd_x
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
x_img_w
,
x_img_h
,
x_img_v
.
data
());
// y
...
...
@@ -169,10 +169,10 @@ TEST(elementwise_add_image, compute) {
auto
y_img_shape
=
default_convertor
.
InitImageDimInfoWith
(
y_dim
);
// w, h
auto
y_img_w
=
y_img_shape
[
0
];
auto
y_img_h
=
y_img_shape
[
1
];
std
::
vector
<
uint16
_t
>
y_img_v
(
y_img_shape
[
0
]
*
y_img_shape
[
1
]
*
4
);
// 4: RGBA
std
::
vector
<
half
_t
>
y_img_v
(
y_img_shape
[
0
]
*
y_img_shape
[
1
]
*
4
);
// 4: RGBA
default_convertor
.
NCHWToImage
(
y_v
.
data
(),
y_img_v
.
data
(),
y_dim
);
eleadd_y
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
eleadd_y
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
y_img_w
,
y_img_h
,
y_img_v
.
data
());
// out
...
...
@@ -180,10 +180,10 @@ TEST(elementwise_add_image, compute) {
default_convertor
.
InitImageDimInfoWith
(
out_dim
);
// w, h
auto
out_img_w
=
out_img_shape
[
0
];
auto
out_img_h
=
out_img_shape
[
1
];
eleadd_out
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
out_img_w
,
out_img_h
);
eleadd_out
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_img_w
,
out_img_h
);
std
::
vector
<
uint16
_t
>
out_img_v
(
out_img_w
*
out_img_h
*
4
);
fill_data
<
uint16
_t
>
(
std
::
vector
<
half
_t
>
out_img_v
(
out_img_w
*
out_img_h
*
4
);
fill_data
<
half
_t
>
(
out_img_v
.
data
(),
out_img_v
.
size
(),
0
);
// fill with zero value
std
::
vector
<
float
>
out_v
(
out_dim
.
production
());
...
...
@@ -235,7 +235,7 @@ TEST(elementwise_add_image, compute) {
const
size_t
cl_image2d_row_pitch
{
0
};
const
size_t
cl_image2d_slice_pitch
{
0
};
TargetWrapperCL
::
ImgcpySync
(
out_img_v
.
data
(),
eleadd_out
.
data
<
uint16
_t
,
cl
::
Image2D
>
(),
eleadd_out
.
data
<
half
_t
,
cl
::
Image2D
>
(),
out_img_w
,
out_img_h
,
cl_image2d_row_pitch
,
...
...
lite/kernels/opencl/elementwise_mul_image_compute.cc
浏览文件 @
b92e9f9a
...
...
@@ -14,6 +14,7 @@
#include <memory>
#include <string>
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_image_converter.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
...
...
@@ -95,10 +96,10 @@ class ElementwiseMulImageCompute
default_convertor
.
InitImageDimInfoWith
(
out
->
dims
());
// w, h
auto
y_img_shape
=
default_convertor
.
InitImageDimInfoWith
(
y
->
dims
());
auto
*
x_img
=
x
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
y_img
=
y
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
out_img
=
out
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
out_img_shape
[
0
],
out_img_shape
[
1
]);
auto
*
x_img
=
x
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
*
y_img
=
y
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
*
out_img
=
out
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_img_shape
[
0
],
out_img_shape
[
1
]);
VLOG
(
4
)
<<
"x_img_shape[w,h]:"
<<
x_img_width
<<
" "
<<
x_img_height
;
VLOG
(
4
)
<<
"y_img_shape[w,h]:"
<<
y_img_shape
[
0
]
<<
" "
<<
y_img_shape
[
1
];
...
...
lite/kernels/opencl/elementwise_mul_image_compute_test.cc
浏览文件 @
b92e9f9a
...
...
@@ -151,9 +151,9 @@ TEST(elementwise_mul_image, compute) {
auto
x_img_shape
=
default_convertor
.
InitImageDimInfoWith
(
x_dim
);
// w, h
auto
x_img_w
=
x_img_shape
[
0
];
auto
x_img_h
=
x_img_shape
[
1
];
std
::
vector
<
uint16
_t
>
x_img_v
(
x_img_w
*
x_img_h
*
4
);
// 4: RGBA
std
::
vector
<
half
_t
>
x_img_v
(
x_img_w
*
x_img_h
*
4
);
// 4: RGBA
default_convertor
.
NCHWToImage
(
x_v
.
data
(),
x_img_v
.
data
(),
x_dim
);
elemul_x
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
elemul_x
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
x_img_w
,
x_img_h
,
x_img_v
.
data
());
// y
...
...
@@ -162,10 +162,10 @@ TEST(elementwise_mul_image, compute) {
auto
y_img_shape
=
default_convertor
.
InitImageDimInfoWith
(
y_dim
);
// w, h
auto
y_img_w
=
y_img_shape
[
0
];
auto
y_img_h
=
y_img_shape
[
1
];
std
::
vector
<
uint16
_t
>
y_img_v
(
y_img_shape
[
0
]
*
y_img_shape
[
1
]
*
4
);
// 4: RGBA
std
::
vector
<
half
_t
>
y_img_v
(
y_img_shape
[
0
]
*
y_img_shape
[
1
]
*
4
);
// 4: RGBA
default_convertor
.
NCHWToImage
(
y_v
.
data
(),
y_img_v
.
data
(),
y_dim
);
elemul_y
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
elemul_y
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
y_img_w
,
y_img_h
,
y_img_v
.
data
());
// out
...
...
@@ -173,10 +173,10 @@ TEST(elementwise_mul_image, compute) {
default_convertor
.
InitImageDimInfoWith
(
out_dim
);
// w, h
auto
out_img_w
=
out_img_shape
[
0
];
auto
out_img_h
=
out_img_shape
[
1
];
elemul_out
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
out_img_w
,
out_img_h
);
elemul_out
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_img_w
,
out_img_h
);
std
::
vector
<
uint16
_t
>
out_img_v
(
out_img_w
*
out_img_h
*
4
);
fill_data
<
uint16
_t
>
(
std
::
vector
<
half
_t
>
out_img_v
(
out_img_w
*
out_img_h
*
4
);
fill_data
<
half
_t
>
(
out_img_v
.
data
(),
out_img_v
.
size
(),
0
);
// fill with zero value
std
::
vector
<
float
>
out_v
(
out_dim
.
production
());
...
...
@@ -218,7 +218,7 @@ TEST(elementwise_mul_image, compute) {
const
size_t
cl_image2d_row_pitch
{
0
};
const
size_t
cl_image2d_slice_pitch
{
0
};
TargetWrapperCL
::
ImgcpySync
(
out_img_v
.
data
(),
elemul_out
.
data
<
uint16
_t
,
cl
::
Image2D
>
(),
elemul_out
.
data
<
half
_t
,
cl
::
Image2D
>
(),
out_img_w
,
out_img_h
,
cl_image2d_row_pitch
,
...
...
lite/kernels/opencl/fusion_elementwise_add_activation_image_compute.cc
浏览文件 @
b92e9f9a
...
...
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/op_registry.h"
#include "lite/kernels/opencl/elementwise_add_image_compute.h"
...
...
lite/kernels/opencl/layout_compute.cc
浏览文件 @
b92e9f9a
...
...
@@ -15,6 +15,7 @@
#include <memory>
#include <string>
#include "lite/api/paddle_place.h"
#include "lite/backends/opencl/cl_half.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "lite/core/target_wrapper.h"
...
...
@@ -47,7 +48,7 @@ class LayoutComputeBufferChwToImageDefault
auto
*
x_data
=
param
.
x
->
data
<
float
,
cl
::
Buffer
>
();
auto
x_dims
=
param
.
x
->
dims
();
auto
image_shape
=
InitImageDimInfoWith
(
x_dims
);
auto
*
y_data
=
param
.
y
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
y_data
=
param
.
y
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
image_shape
[
"width"
],
image_shape
[
"height"
]);
auto
y_dims
=
param
.
y
->
dims
();
...
...
@@ -146,7 +147,7 @@ class LayoutComputeImageDefaultToBufferChw
void
Run
()
override
{
auto
&
param
=
Param
<
param_t
>
();
auto
*
x_data
=
param
.
x
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
x_data
=
param
.
x
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
x_dims
=
param
.
x
->
dims
();
auto
*
y_data
=
param
.
y
->
mutable_data
<
float
,
cl
::
Buffer
>
(
TARGET
(
kOpenCL
));
auto
y_dims
=
param
.
y
->
dims
();
...
...
lite/kernels/opencl/layout_compute_test.cc
浏览文件 @
b92e9f9a
...
...
@@ -79,7 +79,7 @@ TEST(layout_ImageDefault, compute) {
auto
*
y_data
=
y
.
mutable_data
<
float
,
cl
::
Buffer
>
(
TARGET
(
kOpenCL
));
auto
image_shape
=
paddle
::
lite
::
kernels
::
opencl
::
InitImageDimInfoWith
(
x_dim
);
auto
*
y_image_data
=
y_image
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
y_image_data
=
y_image
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
image_shape
[
"width"
],
image_shape
[
"height"
]);
auto
*
mapped_x
=
static_cast
<
float
*>
(
TargetWrapperCL
::
Map
(
x_data
,
0
,
sizeof
(
float
)
*
x_dim
.
production
()));
...
...
lite/kernels/opencl/nearest_interp_image_compute.cc
浏览文件 @
b92e9f9a
...
...
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
...
...
@@ -45,14 +46,13 @@ class NearestInterpComputeImageDefault
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
const
auto
&
x_dims
=
param
.
X
->
dims
();
auto
*
x_buf
=
param
.
X
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
// use
uint16
_t represents half float
param
.
X
->
data
<
half
_t
,
cl
::
Image2D
>
();
// use
half
_t represents half float
auto
image_shape
=
InitImageDimInfoWith
(
x_dims
);
auto
*
out_buf
=
param
.
Out
->
mutable_data
<
uint16_t
,
cl
::
Image2D
>
(
// use uint16_t
// represents half float
image_shape
[
"width"
],
image_shape
[
"height"
]);
auto
*
out_buf
=
param
.
Out
->
mutable_data
<
half_t
,
cl
::
Image2D
>
(
// use half_t
// represents half float
image_shape
[
"width"
],
image_shape
[
"height"
]);
const
auto
&
y_dims
=
param
.
Out
->
dims
();
// useless: check dim only
float
scale_h
=
y_dims
[
2
]
/
x_dims
[
2
];
float
scale_w
=
y_dims
[
3
]
/
x_dims
[
3
];
...
...
lite/kernels/opencl/nearest_interp_image_compute_test.cc
浏览文件 @
b92e9f9a
...
...
@@ -166,11 +166,11 @@ TEST(nearest_interp_image2d, compute) {
mapped_y
[
i
]
=
static_cast
<
int
>
(
0
);
}
auto
*
nearest_interp_in_data
=
nearest_interp_in
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
nearest_interp_in
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
nearest_interp_image2d_shape
[
"width"
],
nearest_interp_image2d_shape
[
"height"
]);
auto
*
nearest_interp_out_data
=
nearest_interp_out
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
nearest_interp_out
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
y_dim
[
3
],
y_dim
[
2
]);
// set context and kernel args
...
...
lite/kernels/opencl/pool_image_compute.cc
浏览文件 @
b92e9f9a
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include <vector>
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
...
...
@@ -67,13 +68,13 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
CHECK
(
context
.
cl_context
()
!=
nullptr
);
auto
*
x_img
=
param
.
x
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
x_img
=
param
.
x
->
data
<
half
_t
,
cl
::
Image2D
>
();
LOG
(
INFO
)
<<
"x_image"
<<
x_img
;
auto
out_image_shape
=
InitImageDimInfoWith
(
out_dims
);
LOG
(
INFO
)
<<
"out_image_shape = "
<<
out_image_shape
[
"width"
]
<<
" "
<<
out_image_shape
[
"height"
];
auto
*
out_img
=
param
.
output
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
out_img
=
param
.
output
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_image_shape
[
"width"
],
out_image_shape
[
"height"
]);
LOG
(
INFO
)
<<
"out_image"
<<
out_img
;
...
...
lite/kernels/opencl/pool_image_compute_test.cc
浏览文件 @
b92e9f9a
...
...
@@ -123,23 +123,22 @@ TEST(pool2d_image2d, compute) {
DDim
x_image_shape
=
default_converter
->
InitImageDimInfoWith
(
in_dim
);
LOG
(
INFO
)
<<
"x_image_shape = "
<<
x_image_shape
[
0
]
<<
" "
<<
x_image_shape
[
1
];
std
::
vector
<
uint16_t
>
x_image_data
(
x_image_shape
.
production
()
*
4
);
// 4 : RGBA
std
::
vector
<
half_t
>
x_image_data
(
x_image_shape
.
production
()
*
4
);
// 4 : RGBA
default_converter
->
NCHWToImage
(
input_v
.
data
(),
x_image_data
.
data
(),
in_dim
);
auto
*
x_image
=
x
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
x_image
=
x
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
x_image_shape
[
0
],
x_image_shape
[
1
],
x_image_data
.
data
());
LOG
(
INFO
)
<<
"x_image:"
<<
x_image
;
DDim
out_image_shape
=
default_converter
->
InitImageDimInfoWith
(
out_dim
);
LOG
(
INFO
)
<<
"out_image_shape = "
<<
out_image_shape
[
0
]
<<
" "
<<
out_image_shape
[
1
];
auto
*
out_image
=
out
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
out_image_shape
[
0
],
out_image_shape
[
1
]);
auto
*
out_image
=
out
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_image_shape
[
0
],
out_image_shape
[
1
]);
LOG
(
INFO
)
<<
"out_image:"
<<
out_image
;
kernel
->
Launch
();
auto
*
wait_list
=
context
->
As
<
OpenCLContext
>
().
cl_wait_list
();
auto
*
out_ptr
=
param
.
output
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
out_ptr
=
param
.
output
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
it
=
wait_list
->
find
(
out_ptr
);
if
(
it
!=
wait_list
->
end
())
{
VLOG
(
4
)
<<
"--- Find the sync event for the target cl tensor. ---"
;
...
...
@@ -154,7 +153,7 @@ TEST(pool2d_image2d, compute) {
const
size_t
cl_image2d_row_pitch
{
0
};
const
size_t
cl_image2d_slice_pitch
{
0
};
uint16_t
*
out_image_data
=
new
uint16
_t
[
out_image_shape
.
production
()
*
4
];
half_t
*
out_image_data
=
new
half
_t
[
out_image_shape
.
production
()
*
4
];
TargetWrapperCL
::
ImgcpySync
(
out_image_data
,
out_image
,
out_image_shape
[
0
],
...
...
lite/kernels/opencl/reshape_image_compute.cc
浏览文件 @
b92e9f9a
...
...
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
...
...
@@ -51,7 +52,7 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
const
int64_t
&
input_image_width
=
input_image_shape
.
at
(
"width"
);
const
int64_t
&
input_image_height
=
input_image_shape
.
at
(
"height"
);
const
cl
::
Image2D
*
const
x_image
=
x
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
const
cl
::
Image2D
*
const
x_image
=
x
->
data
<
half
_t
,
cl
::
Image2D
>
();
const
std
::
vector
<
int
>&
shape_vct
=
param
.
shape_vct
;
Tensor
*
const
output
=
param
.
output
;
...
...
@@ -60,7 +61,7 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
const
std
::
map
<
std
::
string
,
size_t
>&
out_image_shape
=
InitImageDimInfoWith
(
out_dims
);
cl
::
Image2D
*
const
out_image
=
output
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
cl
::
Image2D
*
const
out_image
=
output
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_image_shape
.
at
(
"width"
),
out_image_shape
.
at
(
"height"
));
LOG
(
INFO
)
<<
"out_dims= "
<<
out_dims
;
...
...
lite/kernels/opencl/reshape_image_compute_test.cc
浏览文件 @
b92e9f9a
...
...
@@ -152,13 +152,13 @@ TEST(reshape_opencl, compute) {
}
paddle
::
lite
::
CLImageConverterDefault
default_convertor
;
std
::
vector
<
uint16
_t
>
x_image_data
(
input_image_width
*
input_image_height
*
4
);
// 4 : RGBA
std
::
vector
<
half
_t
>
x_image_data
(
input_image_width
*
input_image_height
*
4
);
// 4 : RGBA
LOG
(
INFO
)
<<
"set mapped input ..."
;
default_convertor
.
NCHWToImage
(
input_v_data
,
x_image_data
.
data
(),
input_dim
);
auto
*
input_image
=
input
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
input_image
=
input
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
input_image_width
,
input_image_height
,
x_image_data
.
data
());
LOG
(
INFO
)
<<
"prepare kernel ready"
;
...
...
@@ -168,7 +168,7 @@ TEST(reshape_opencl, compute) {
DDim
out_image_shape
=
default_converter
.
InitImageDimInfoWith
(
output_dim
);
LOG
(
INFO
)
<<
"out_image_shape = "
<<
out_image_shape
[
0
]
<<
" "
<<
out_image_shape
[
1
];
auto
*
out_image
=
output
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
out_image
=
output
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_image_shape
[
0
],
out_image_shape
[
1
]);
VLOG
(
4
)
<<
"out_dims= "
<<
output_dim
;
...
...
@@ -185,7 +185,7 @@ TEST(reshape_opencl, compute) {
kernel
->
Launch
();
auto
*
wait_list
=
context
->
As
<
OpenCLContext
>
().
cl_wait_list
();
auto
*
out_ptr
=
param
.
output
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
out_ptr
=
param
.
output
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
it
=
wait_list
->
find
(
out_image
);
if
(
it
!=
wait_list
->
end
())
{
...
...
@@ -196,9 +196,9 @@ TEST(reshape_opencl, compute) {
LOG
(
FATAL
)
<<
"Could not find the sync event for the target cl tensor."
;
}
uint16_t
*
out_image_data
=
new
uint16
_t
[
out_image_shape
.
production
()
*
4
];
half_t
*
out_image_data
=
new
half
_t
[
out_image_shape
.
production
()
*
4
];
TargetWrapperCL
::
ImgcpySync
(
out_image_data
,
output
.
data
<
uint16
_t
,
cl
::
Image2D
>
(),
output
.
data
<
half
_t
,
cl
::
Image2D
>
(),
out_image_shape
[
0
],
out_image_shape
[
1
],
cl_image2d_row_pitch
,
...
...
lite/kernels/opencl/scale_image_compute.cc
浏览文件 @
b92e9f9a
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include <vector>
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
...
...
@@ -43,7 +44,7 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
void
Run
()
override
{
const
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
const
auto
&
in_dims
=
param
.
x
->
dims
();
auto
*
x_img
=
param
.
x
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
x_img
=
param
.
x
->
data
<
half
_t
,
cl
::
Image2D
>
();
const
float
scale
=
param
.
scale
;
const
float
bias
=
param
.
bias
;
...
...
@@ -51,7 +52,7 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
auto
out_image_shape
=
InitImageDimInfoWith
(
in_dims
);
LOG
(
INFO
)
<<
"out_image_shape = "
<<
out_image_shape
[
"width"
]
<<
" "
<<
out_image_shape
[
"height"
];
auto
*
out_img
=
param
.
output
->
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
out_img
=
param
.
output
->
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
out_image_shape
[
"width"
],
out_image_shape
[
"height"
]);
LOG
(
INFO
)
<<
"out_image"
<<
out_img
;
...
...
lite/kernels/opencl/scale_image_compute_test.cc
浏览文件 @
b92e9f9a
...
...
@@ -77,19 +77,19 @@ TEST(scale_image2d_fp32, compute) {
CLImageConverterDefault
*
default_converter
=
new
CLImageConverterDefault
();
DDim
image_shape
=
default_converter
->
InitImageDimInfoWith
(
in_dim
);
LOG
(
INFO
)
<<
"image_shape = "
<<
image_shape
[
0
]
<<
" "
<<
image_shape
[
1
];
std
::
vector
<
uint16
_t
>
x_image_data
(
image_shape
.
production
()
*
4
);
// 4 : RGBA
std
::
vector
<
half
_t
>
x_image_data
(
image_shape
.
production
()
*
4
);
// 4 : RGBA
default_converter
->
NCHWToImage
(
input_v
.
data
(),
x_image_data
.
data
(),
in_dim
);
auto
*
x_image
=
x
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
auto
*
x_image
=
x
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
image_shape
[
0
],
image_shape
[
1
],
x_image_data
.
data
());
LOG
(
INFO
)
<<
"x_image:"
<<
x_image
;
auto
*
out_image
=
out
.
mutable_data
<
uint16
_t
,
cl
::
Image2D
>
(
image_shape
[
0
],
image_shape
[
1
]);
out
.
mutable_data
<
half
_t
,
cl
::
Image2D
>
(
image_shape
[
0
],
image_shape
[
1
]);
LOG
(
INFO
)
<<
"out_image:"
<<
out_image
;
kernel
->
Launch
();
auto
*
wait_list
=
context
->
As
<
OpenCLContext
>
().
cl_wait_list
();
auto
*
out_ptr
=
param
.
output
->
data
<
uint16
_t
,
cl
::
Image2D
>
();
auto
*
out_ptr
=
param
.
output
->
data
<
half
_t
,
cl
::
Image2D
>
();
auto
it
=
wait_list
->
find
(
out_ptr
);
if
(
it
!=
wait_list
->
end
())
{
VLOG
(
4
)
<<
"--- Find the sync event for the target cl tensor. ---"
;
...
...
@@ -104,7 +104,7 @@ TEST(scale_image2d_fp32, compute) {
const
size_t
cl_image2d_row_pitch
{
0
};
const
size_t
cl_image2d_slice_pitch
{
0
};
uint16_t
*
out_image_data
=
new
uint16
_t
[
image_shape
.
production
()
*
4
];
half_t
*
out_image_data
=
new
half
_t
[
image_shape
.
production
()
*
4
];
TargetWrapperCL
::
ImgcpySync
(
out_image_data
,
out_image
,
image_shape
[
0
],
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录