Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
fc3a66ae
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
fc3a66ae
编写于
6月 25, 2019
作者:
C
Chunwei
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'fix_opencl_acc' into 'incubate/lite'
fix elementwise_add acc bugs. See merge request inference/paddlelite!58
上级
679aabac
19bea13c
变更
17
展开全部
显示空白变更内容
内联
并排
Showing
17 changed file
with
210 addition
and
633 deletion
+210
-633
paddle/fluid/lite/api/mobilenetv1_test.cc
paddle/fluid/lite/api/mobilenetv1_test.cc
+1
-1
paddle/fluid/lite/core/context.h
paddle/fluid/lite/core/context.h
+3
-0
paddle/fluid/lite/kernels/opencl/elementwise_add_compute.cc
paddle/fluid/lite/kernels/opencl/elementwise_add_compute.cc
+2
-2
paddle/fluid/lite/kernels/opencl/elementwise_add_compute_test.cc
...fluid/lite/kernels/opencl/elementwise_add_compute_test.cc
+4
-4
paddle/fluid/lite/opencl/CMakeLists.txt
paddle/fluid/lite/opencl/CMakeLists.txt
+2
-4
paddle/fluid/lite/opencl/cl_caller.cc
paddle/fluid/lite/opencl/cl_caller.cc
+24
-14
paddle/fluid/lite/opencl/cl_caller.h
paddle/fluid/lite/opencl/cl_caller.h
+2
-2
paddle/fluid/lite/opencl/cl_engine.cc
paddle/fluid/lite/opencl/cl_engine.cc
+1
-2
paddle/fluid/lite/opencl/cl_half.cc
paddle/fluid/lite/opencl/cl_half.cc
+0
-518
paddle/fluid/lite/opencl/cl_image.cc
paddle/fluid/lite/opencl/cl_image.cc
+4
-5
paddle/fluid/lite/opencl/cl_image_converter.cc
paddle/fluid/lite/opencl/cl_image_converter.cc
+20
-21
paddle/fluid/lite/opencl/cl_image_converter.h
paddle/fluid/lite/opencl/cl_image_converter.h
+14
-15
paddle/fluid/lite/opencl/cl_kernel/channel_add_kernel.cl
paddle/fluid/lite/opencl/cl_kernel/channel_add_kernel.cl
+29
-0
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
+7
-9
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
+5
-6
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
+7
-8
paddle/fluid/lite/opencl/cl_test.cc
paddle/fluid/lite/opencl/cl_test.cc
+85
-22
未找到文件。
paddle/fluid/lite/api/mobilenetv1_test.cc
浏览文件 @
fc3a66ae
...
@@ -61,7 +61,7 @@ void TestModel(const std::vector<Place>& valid_places,
...
@@ -61,7 +61,7 @@ void TestModel(const std::vector<Place>& valid_places,
3.13812525e-05
,
6.52209565e-05
,
4.78087313e-05
,
3.13812525e-05
,
6.52209565e-05
,
4.78087313e-05
,
2.58822285e-04
});
2.58822285e-04
});
for
(
int
i
=
0
;
i
<
results
.
size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
results
.
size
();
++
i
)
{
EXPECT_NEAR
(
out
->
data
<
float
>
()[
i
],
results
[
i
],
1e-
5
);
EXPECT_NEAR
(
out
->
data
<
float
>
()[
i
],
results
[
i
],
1e-
6
);
}
}
ASSERT_EQ
(
out
->
dims
().
size
(),
2
);
ASSERT_EQ
(
out
->
dims
().
size
(),
2
);
ASSERT_EQ
(
out
->
dims
()[
0
],
1
);
ASSERT_EQ
(
out
->
dims
()[
0
],
1
);
...
...
paddle/fluid/lite/core/context.h
浏览文件 @
fc3a66ae
...
@@ -236,12 +236,15 @@ class Context<TargetType::kOpenCL> {
...
@@ -236,12 +236,15 @@ class Context<TargetType::kOpenCL> {
void
CopySharedTo
(
const
OpenClContext
*
ctx
)
{
void
CopySharedTo
(
const
OpenClContext
*
ctx
)
{
ctx
->
cl_context_
=
cl_context_
;
ctx
->
cl_context_
=
cl_context_
;
ctx
->
cl_helper_
=
cl_helper_
;
}
}
private:
private:
void
PrepareKernels
()
{
void
PrepareKernels
()
{
cl_helper_
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
cl_helper_
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
cl_helper_
->
AddKernel
(
"channel_add"
,
"channel_add_kernel.cl"
);
cl_helper_
->
AddKernel
(
"pool_max"
,
"pool_kernel.cl"
);
cl_helper_
->
AddKernel
(
"pool_max"
,
"pool_kernel.cl"
);
cl_helper_
->
AddKernel
(
"pool_avg"
,
"pool_kernel.cl"
);
}
}
};
};
#endif
#endif
...
...
paddle/fluid/lite/kernels/opencl/elementwise_add_compute.cc
浏览文件 @
fc3a66ae
...
@@ -31,10 +31,10 @@ class ElementwiseAddCompute
...
@@ -31,10 +31,10 @@ class ElementwiseAddCompute
void
Run
()
override
{
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
ctx_
->
As
<
OpenClContext
>
();
auto
&
context
=
ctx_
->
As
<
OpenClContext
>
();
CHECK
(
context
.
cl_
context
()
);
CHECK
(
context
.
cl_
helper
()
!=
nullptr
);
elementwise_add
(
elementwise_add
(
context
.
cl_
context
(),
static_cast
<
const
float
*>
(
param
.
X
->
raw_data
()),
context
.
cl_
helper
(),
static_cast
<
const
float
*>
(
param
.
X
->
raw_data
()),
param
.
X
->
dims
(),
static_cast
<
const
float
*>
(
param
.
Y
->
raw_data
()),
param
.
X
->
dims
(),
static_cast
<
const
float
*>
(
param
.
Y
->
raw_data
()),
param
.
Y
->
dims
(),
param
.
Out
->
mutable_data
<
float
>
(),
param
.
Out
->
dims
());
param
.
Y
->
dims
(),
param
.
Out
->
mutable_data
<
float
>
(),
param
.
Out
->
dims
());
}
}
...
...
paddle/fluid/lite/kernels/opencl/elementwise_add_compute_test.cc
浏览文件 @
fc3a66ae
...
@@ -40,9 +40,9 @@ TEST(elementwise_add, init) {
...
@@ -40,9 +40,9 @@ TEST(elementwise_add, init) {
kernel
->
SetParam
(
param
);
kernel
->
SetParam
(
param
);
kernel
->
SetContext
(
std
::
move
(
context
));
kernel
->
SetContext
(
std
::
move
(
context
));
X
.
Resize
({
1
,
10
});
X
.
Resize
({
1
,
1
,
1
,
1
0
});
Y
.
Resize
({
1
,
10
});
Y
.
Resize
({
1
,
1
,
1
,
1
0
});
Out
.
Resize
({
1
,
10
});
Out
.
Resize
({
1
,
1
,
1
,
1
0
});
auto
*
x_data
=
X
.
mutable_data
<
float
>
();
auto
*
x_data
=
X
.
mutable_data
<
float
>
();
auto
*
y_data
=
Y
.
mutable_data
<
float
>
();
auto
*
y_data
=
Y
.
mutable_data
<
float
>
();
...
@@ -56,7 +56,7 @@ TEST(elementwise_add, init) {
...
@@ -56,7 +56,7 @@ TEST(elementwise_add, init) {
kernel
->
Launch
();
kernel
->
Launch
();
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
3.4
*
i
,
1e-
1
);
EXPECT_NEAR
(
out_data
[
i
],
3.4
*
i
,
1e-
6
);
}
}
}
}
...
...
paddle/fluid/lite/opencl/CMakeLists.txt
浏览文件 @
fc3a66ae
...
@@ -5,13 +5,11 @@ endif()
...
@@ -5,13 +5,11 @@ endif()
cc_library
(
cl_wrapper SRCS cl_wrapper.cc
)
cc_library
(
cl_wrapper SRCS cl_wrapper.cc
)
cc_library
(
cl_tool SRCS cl_tool.cc
)
cc_library
(
cl_tool SRCS cl_tool.cc
)
target_compile_options
(
cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers
)
target_compile_options
(
cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers
)
cc_library
(
cl_half SRCS cl_half.cc
)
target_compile_options
(
cl_half BEFORE PUBLIC -fno-strict-aliasing
)
cc_library
(
cl_engine SRCS cl_engine.cc DEPS cl_tool
)
cc_library
(
cl_engine SRCS cl_engine.cc DEPS cl_tool
)
cc_library
(
cl_context SRCS cl_context.cc DEPS cl_engine
)
cc_library
(
cl_context SRCS cl_context.cc DEPS cl_engine
)
cc_library
(
cl_helper SRCS cl_helper.cc DEPS cl_context
)
cc_library
(
cl_helper SRCS cl_helper.cc DEPS cl_context
)
cc_library
(
cl_image_converter SRCS cl_image_converter.cc DEPS
cl_half
lite_tensor
)
cc_library
(
cl_image_converter SRCS cl_image_converter.cc DEPS lite_tensor
)
cc_library
(
cl_image SRCS cl_image.cc DEPS
cl_half
lite_tensor cl_image_converter cl_engine
)
cc_library
(
cl_image SRCS cl_image.cc DEPS lite_tensor cl_image_converter cl_engine
)
cc_library
(
cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image
)
cc_library
(
cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image
)
lite_cc_test
(
test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller cl_wrapper
)
lite_cc_test
(
test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller cl_wrapper
)
add_dependencies
(
cl_tool opencl_clhpp
)
add_dependencies
(
cl_tool opencl_clhpp
)
paddle/fluid/lite/opencl/cl_caller.cc
浏览文件 @
fc3a66ae
...
@@ -15,7 +15,6 @@ limitations under the License. */
...
@@ -15,7 +15,6 @@ limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include <string>
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
...
@@ -23,16 +22,17 @@ limitations under the License. */
...
@@ -23,16 +22,17 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
static
void
CopyImageData
(
const
CLImage
&
cl_image
,
float
*
out
)
{
static
void
CopyImageData
(
CLHelper
*
helper
,
const
CLImage
&
cl_image
,
float
*
out
)
{
int
width
=
cl_image
.
image_dims
()[
0
];
int
width
=
cl_image
.
image_dims
()[
0
];
int
height
=
cl_image
.
image_dims
()[
1
];
int
height
=
cl_image
.
image_dims
()[
1
];
half_t
*
image_data
=
new
half_
t
[
height
*
width
*
4
];
float
*
image_data
=
new
floa
t
[
height
*
width
*
4
];
cl
::
Image
*
image
=
cl_image
.
cl_image
();
cl
::
Image
*
image
=
cl_image
.
cl_image
();
const
std
::
array
<
size_t
,
3
>
origin
{
0
,
0
,
0
};
const
std
::
array
<
size_t
,
3
>
origin
{
0
,
0
,
0
};
const
std
::
array
<
size_t
,
3
>
region
{
static_cast
<
size_t
>
(
width
),
const
std
::
array
<
size_t
,
3
>
region
{
static_cast
<
size_t
>
(
width
),
static_cast
<
size_t
>
(
height
),
1
};
static_cast
<
size_t
>
(
height
),
1
};
cl_int
err
=
CLEngine
::
Global
()
->
command_q
ueue
().
enqueueReadImage
(
cl_int
err
=
helper
->
OpenCLCommandQ
ueue
().
enqueueReadImage
(
*
image
,
CL_TRUE
,
origin
,
region
,
0
,
0
,
image_data
,
nullptr
,
nullptr
);
*
image
,
CL_TRUE
,
origin
,
region
,
0
,
0
,
image_data
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
err
);
CL_CHECK_ERRORS
(
err
);
...
@@ -49,22 +49,25 @@ bool InitOpenCLEngine(std::string cl_path) {
...
@@ -49,22 +49,25 @@ bool InitOpenCLEngine(std::string cl_path) {
return
engine
->
IsInitSuccess
();
return
engine
->
IsInitSuccess
();
}
}
void
elementwise_add
(
CL
Context
*
context
,
const
float
*
in
,
const
DDim
&
in_dim
,
void
elementwise_add
(
CL
Helper
*
helper
,
const
float
*
in
,
const
DDim
&
in_dim
,
const
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
)
{
const
DDim
&
out_dim
)
{
CLHelper
helper
(
context
);
if
(
!
(
bias_dim
.
size
()
==
1
||
bias_dim
.
size
()
==
4
))
{
helper
.
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
LOG
(
FATAL
)
<<
"Error: bias dims is error"
;
auto
kernel
=
helper
.
GetKernel
(
0
);
return
;
}
auto
kernel
=
bias_dim
.
size
()
==
1
?
helper
->
GetKernel
(
"channel_add"
)
:
helper
->
GetKernel
(
"elementwise_add"
);
CLImage
in_image
;
CLImage
in_image
;
in_image
.
set_tensor_data
(
in
,
in_dim
);
in_image
.
set_tensor_data
(
in
,
in_dim
);
in_image
.
InitNormalCLImage
(
helper
.
OpenCLContext
());
in_image
.
InitNormalCLImage
(
helper
->
OpenCLContext
());
VLOG
(
3
)
<<
" --- Inpu image: "
<<
in_image
<<
" --- "
;
VLOG
(
3
)
<<
" --- Inpu image: "
<<
in_image
<<
" --- "
;
CLImage
bias_image
;
CLImage
bias_image
;
bias_image
.
set_tensor_data
(
bias
,
bias_dim
);
bias_image
.
set_tensor_data
(
bias
,
bias_dim
);
bias_image
.
Init
NormalCLImage
(
helper
.
OpenCLContext
());
bias_image
.
Init
CLImage
(
helper
->
OpenCLContext
());
VLOG
(
3
)
<<
" --- Bias image: "
<<
bias_image
<<
" --- "
;
VLOG
(
3
)
<<
" --- Bias image: "
<<
bias_image
<<
" --- "
;
CLImage
out_image
;
CLImage
out_image
;
out_image
.
InitEmptyImage
(
helper
.
OpenCLContext
(),
out_dim
);
out_image
.
InitEmptyImage
(
helper
->
OpenCLContext
(),
out_dim
);
cl_int
status
;
cl_int
status
;
status
=
kernel
.
setArg
(
0
,
*
in_image
.
cl_image
());
status
=
kernel
.
setArg
(
0
,
*
in_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
CL_CHECK_ERRORS
(
status
);
...
@@ -72,16 +75,23 @@ void elementwise_add(CLContext* context, const float* in, const DDim& in_dim,
...
@@ -72,16 +75,23 @@ void elementwise_add(CLContext* context, const float* in, const DDim& in_dim,
CL_CHECK_ERRORS
(
status
);
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_image
.
cl_image
());
status
=
kernel
.
setArg
(
2
,
*
out_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
CL_CHECK_ERRORS
(
status
);
if
(
bias_dim
.
size
()
==
1
)
{
int
tensor_w
=
in_dim
[
3
];
status
=
kernel
.
setArg
(
3
,
tensor_w
);
CL_CHECK_ERRORS
(
status
);
}
size_t
width
=
in_image
.
ImageWidth
();
size_t
width
=
in_image
.
ImageWidth
();
size_t
height
=
in_image
.
ImageHeight
();
size_t
height
=
in_image
.
ImageHeight
();
auto
global_work_size
=
cl
::
NDRange
{
width
,
height
};
auto
global_work_size
=
cl
::
NDRange
{
width
,
height
};
status
=
helper
.
OpenCLCommandQueue
().
enqueueNDRangeKernel
(
status
=
helper
->
OpenCLCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
nullptr
,
nullptr
);
kernel
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
status
);
CL_CHECK_ERRORS
(
status
);
status
=
helper
->
OpenCLCommandQueue
().
finish
();
CL_CHECK_ERRORS
(
status
);
VLOG
(
3
)
<<
" --- Out image: "
<<
out_image
<<
" --- "
;
VLOG
(
3
)
<<
" --- Out image: "
<<
out_image
<<
" --- "
;
CopyImageData
(
helper
,
out_image
,
out
);
CopyImageData
(
out_image
,
out
);
}
}
}
// namespace lite
}
// namespace lite
...
...
paddle/fluid/lite/opencl/cl_caller.h
浏览文件 @
fc3a66ae
...
@@ -16,7 +16,7 @@ limitations under the License. */
...
@@ -16,7 +16,7 @@ limitations under the License. */
#include <string>
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_
context
.h"
#include "paddle/fluid/lite/opencl/cl_
helper
.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -27,7 +27,7 @@ bool InitOpenCLEngine(std::string cl_path);
...
@@ -27,7 +27,7 @@ bool InitOpenCLEngine(std::string cl_path);
/// black box so that the framework can remain simple.
/// black box so that the framework can remain simple.
/// NOTE Currently, these methods are quite expensive, we will optimize them
/// NOTE Currently, these methods are quite expensive, we will optimize them
/// latter.
/// latter.
void
elementwise_add
(
CL
Context
*
context
,
const
float
*
in
,
const
DDim
&
in_dim
,
void
elementwise_add
(
CL
Helper
*
helper
,
const
float
*
in
,
const
DDim
&
in_dim
,
const
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
);
const
DDim
&
out_dim
);
...
...
paddle/fluid/lite/opencl/cl_engine.cc
浏览文件 @
fc3a66ae
...
@@ -156,8 +156,7 @@ bool CLEngine::InitializeDevice() {
...
@@ -156,8 +156,7 @@ bool CLEngine::InitializeDevice() {
if
(
ext_data
.
find
(
"cl_khr_fp16"
)
!=
std
::
string
::
npos
)
{
if
(
ext_data
.
find
(
"cl_khr_fp16"
)
!=
std
::
string
::
npos
)
{
LOG
(
INFO
)
<<
"The chosen device supports the half data type."
;
LOG
(
INFO
)
<<
"The chosen device supports the half data type."
;
}
else
{
}
else
{
LOG
(
ERROR
)
<<
"The chosen device doesn't support the half data type!"
;
LOG
(
INFO
)
<<
"The chosen device doesn't support the half data type!"
;
return
false
;
}
}
auto
max_units
=
device_
->
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
();
auto
max_units
=
device_
->
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
();
LOG
(
INFO
)
<<
"The chosen device has "
<<
max_units
<<
" compute units."
;
LOG
(
INFO
)
<<
"The chosen device has "
<<
max_units
<<
" compute units."
;
...
...
paddle/fluid/lite/opencl/cl_half.cc
已删除
100644 → 0
浏览文件 @
679aabac
此差异已折叠。
点击以展开。
paddle/fluid/lite/opencl/cl_image.cc
浏览文件 @
fc3a66ae
...
@@ -16,7 +16,6 @@ limitations under the License. */
...
@@ -16,7 +16,6 @@ limitations under the License. */
#include <glog/logging.h>
#include <glog/logging.h>
#include <array>
#include <array>
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_half.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -26,7 +25,7 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) {
...
@@ -26,7 +25,7 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) {
int
width
=
cl_image
.
image_dims_
[
0
];
int
width
=
cl_image
.
image_dims_
[
0
];
int
height
=
cl_image
.
image_dims_
[
1
];
int
height
=
cl_image
.
image_dims_
[
1
];
half_t
*
image_data
=
new
half_
t
[
height
*
width
*
4
];
float
*
image_data
=
new
floa
t
[
height
*
width
*
4
];
cl
::
Image
*
image
=
cl_image
.
cl_image
();
cl
::
Image
*
image
=
cl_image
.
cl_image
();
const
std
::
array
<
size_t
,
3
>
origin
{
0
,
0
,
0
};
const
std
::
array
<
size_t
,
3
>
origin
{
0
,
0
,
0
};
const
std
::
array
<
size_t
,
3
>
region
{
static_cast
<
size_t
>
(
width
),
const
std
::
array
<
size_t
,
3
>
region
{
static_cast
<
size_t
>
(
width
),
...
@@ -131,9 +130,9 @@ void CLImage::InitCLImage(const cl::Context& context,
...
@@ -131,9 +130,9 @@ void CLImage::InitCLImage(const cl::Context& context,
image_dims_
=
converter
->
InitImageDimInfoWith
(
tensor_dims_
);
image_dims_
=
converter
->
InitImageDimInfoWith
(
tensor_dims_
);
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
half_t
*
image_data
=
new
half_
t
[
image_dims_
.
product
()
*
4
];
float
*
image_data
=
new
floa
t
[
image_dims_
.
product
()
*
4
];
#else
#else
half_t
*
image_data
=
new
half_
t
[
image_dims_
.
production
()
*
4
];
float
*
image_data
=
new
floa
t
[
image_dims_
.
production
()
*
4
];
#endif
#endif
VLOG
(
3
)
<<
" convert to image "
;
VLOG
(
3
)
<<
" convert to image "
;
...
@@ -151,7 +150,7 @@ void CLImage::InitCLImage(const cl::Context& context,
...
@@ -151,7 +150,7 @@ void CLImage::InitCLImage(const cl::Context& context,
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
,
int
width
,
int
height
,
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
,
int
width
,
int
height
,
void
*
data
)
{
void
*
data
)
{
cl
::
ImageFormat
img_format
(
CL_RGBA
,
CL_
HALF_
FLOAT
);
cl
::
ImageFormat
img_format
(
CL_RGBA
,
CL_FLOAT
);
cl_int
err
;
cl_int
err
;
cl_image_
.
reset
(
new
cl
::
Image2D
(
cl_image_
.
reset
(
new
cl
::
Image2D
(
context
,
CL_MEM_READ_WRITE
|
(
data
?
CL_MEM_COPY_HOST_PTR
:
0
),
context
,
CL_MEM_READ_WRITE
|
(
data
?
CL_MEM_COPY_HOST_PTR
:
0
),
...
...
paddle/fluid/lite/opencl/cl_image_converter.cc
浏览文件 @
fc3a66ae
...
@@ -36,7 +36,7 @@ DDim CLImageConverterDefault::InitImageDimInfoWith(const DDim &tensor_dim) {
...
@@ -36,7 +36,7 @@ DDim CLImageConverterDefault::InitImageDimInfoWith(const DDim &tensor_dim) {
static_cast
<
DDim
::
value_type
>
(
height
)}));
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
}
void
CLImageConverterDefault
::
NCHWToImage
(
float
*
nchw
,
half_
t
*
image
,
void
CLImageConverterDefault
::
NCHWToImage
(
float
*
nchw
,
floa
t
*
image
,
const
DDim
&
tensor_dim
)
{
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
...
@@ -68,7 +68,7 @@ void CLImageConverterDefault::NCHWToImage(float *nchw, half_t *image,
...
@@ -68,7 +68,7 @@ void CLImageConverterDefault::NCHWToImage(float *nchw, half_t *image,
if
(
c
<
C
)
{
if
(
c
<
C
)
{
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
// (c % 4);
image
[
i2
]
=
Float2Half
(
*
p
)
;
image
[
i2
]
=
*
p
;
i2
+=
4
;
i2
+=
4
;
p
++
;
p
++
;
}
else
{
}
else
{
...
@@ -83,7 +83,7 @@ void CLImageConverterDefault::NCHWToImage(float *nchw, half_t *image,
...
@@ -83,7 +83,7 @@ void CLImageConverterDefault::NCHWToImage(float *nchw, half_t *image,
}
}
}
}
void
CLImageConverterDefault
::
ImageToNCHW
(
half_
t
*
image
,
float
*
tensor
,
void
CLImageConverterDefault
::
ImageToNCHW
(
floa
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
...
@@ -107,7 +107,7 @@ void CLImageConverterDefault::ImageToNCHW(half_t *image, float *tensor,
...
@@ -107,7 +107,7 @@ void CLImageConverterDefault::ImageToNCHW(half_t *image, float *tensor,
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
*
p
=
Half2Float
(
image
[
i2
])
;
*
p
=
image
[
i2
]
;
i2
+=
4
;
i2
+=
4
;
p
++
;
p
++
;
}
}
...
@@ -161,7 +161,7 @@ DDim CLImageConverterFolder::InitImageDimInfoWith(const DDim &tensor_dim) {
...
@@ -161,7 +161,7 @@ DDim CLImageConverterFolder::InitImageDimInfoWith(const DDim &tensor_dim) {
}
}
}
}
void
CLImageConverterFolder
::
NCHWToImage
(
float
*
tensor
,
half_
t
*
image
,
void
CLImageConverterFolder
::
NCHWToImage
(
float
*
tensor
,
floa
t
*
image
,
const
DDim
&
tensor_dim
)
{
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
<=
4
&&
tensor_dim
.
size
()
>
0
)
CHECK
(
tensor_dim
.
size
()
<=
4
&&
tensor_dim
.
size
()
>
0
)
<<
" Tensor dim is not support!"
;
<<
" Tensor dim is not support!"
;
...
@@ -184,14 +184,13 @@ void CLImageConverterFolder::NCHWToImage(float *tensor, half_t *image,
...
@@ -184,14 +184,13 @@ void CLImageConverterFolder::NCHWToImage(float *tensor, half_t *image,
for
(
size_t
h
=
0
;
h
<
tdim
[
0
];
h
++
)
{
for
(
size_t
h
=
0
;
h
<
tdim
[
0
];
h
++
)
{
for
(
size_t
w
=
0
;
w
<
tdim
[
1
];
w
++
)
{
for
(
size_t
w
=
0
;
w
<
tdim
[
1
];
w
++
)
{
image
[(
h
*
width
+
w
/
4
)
*
4
+
(
w
%
4
)]
=
image
[(
h
*
width
+
w
/
4
)
*
4
+
(
w
%
4
)]
=
tensor
[
h
*
tdim
[
1
]
+
w
];
Float2Half
(
tensor
[
h
*
tdim
[
1
]
+
w
]);
}
}
}
}
}
}
}
}
void
CLImageConverterFolder
::
ImageToNCHW
(
half_
t
*
image
,
float
*
tensor
,
void
CLImageConverterFolder
::
ImageToNCHW
(
floa
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
const
DDim
&
tensor_dim
)
{
if
(
tensor_dim
.
size
()
>
2
)
{
if
(
tensor_dim
.
size
()
>
2
)
{
...
@@ -213,7 +212,7 @@ void CLImageConverterFolder::ImageToNCHW(half_t *image, float *tensor,
...
@@ -213,7 +212,7 @@ void CLImageConverterFolder::ImageToNCHW(half_t *image, float *tensor,
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
p
[
h
*
W
+
w
]
=
Half2Float
(
image
[(
h
*
width
+
w
/
4
)
*
4
+
(
w
%
4
)])
;
p
[
h
*
W
+
w
]
=
image
[(
h
*
width
+
w
/
4
)
*
4
+
(
w
%
4
)]
;
}
}
}
}
}
}
...
@@ -233,7 +232,7 @@ DDim CLImageConverterNWBlock::InitImageDimInfoWith(const DDim &tensor_dim) {
...
@@ -233,7 +232,7 @@ DDim CLImageConverterNWBlock::InitImageDimInfoWith(const DDim &tensor_dim) {
static_cast
<
DDim
::
value_type
>
(
height
)}));
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
}
void
CLImageConverterNWBlock
::
NCHWToImage
(
float
*
tensor
,
half_
t
*
image
,
void
CLImageConverterNWBlock
::
NCHWToImage
(
float
*
tensor
,
floa
t
*
image
,
const
DDim
&
tensor_dim
)
{
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
auto
image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
auto
image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
...
@@ -253,7 +252,7 @@ void CLImageConverterNWBlock::NCHWToImage(float *tensor, half_t *image,
...
@@ -253,7 +252,7 @@ void CLImageConverterNWBlock::NCHWToImage(float *tensor, half_t *image,
size_t
index
=
4
*
c
*
(
width
*
H
)
+
4
*
h
*
width
+
4
*
W
*
(
n
/
4
)
+
size_t
index
=
4
*
c
*
(
width
*
H
)
+
4
*
h
*
width
+
4
*
W
*
(
n
/
4
)
+
w
*
4
+
n
%
4
;
w
*
4
+
n
%
4
;
if
(
n
<
N
)
{
if
(
n
<
N
)
{
image
[
index
]
=
Float2Half
(
*
p
)
;
image
[
index
]
=
*
p
;
p
++
;
p
++
;
}
else
{
}
else
{
image
[
index
]
=
0.0
;
image
[
index
]
=
0.0
;
...
@@ -268,7 +267,7 @@ void CLImageConverterNWBlock::NCHWToImage(float *tensor, half_t *image,
...
@@ -268,7 +267,7 @@ void CLImageConverterNWBlock::NCHWToImage(float *tensor, half_t *image,
VLOG
(
3
)
<<
" init done"
;
VLOG
(
3
)
<<
" init done"
;
}
}
void
CLImageConverterNWBlock
::
ImageToNCHW
(
half_
t
*
image
,
float
*
tensor
,
void
CLImageConverterNWBlock
::
ImageToNCHW
(
floa
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
...
@@ -286,7 +285,7 @@ void CLImageConverterNWBlock::ImageToNCHW(half_t *image, float *tensor,
...
@@ -286,7 +285,7 @@ void CLImageConverterNWBlock::ImageToNCHW(half_t *image, float *tensor,
for
(
size_t
w
=
0
;
w
<
W
;
++
w
)
{
for
(
size_t
w
=
0
;
w
<
W
;
++
w
)
{
size_t
index
=
4
*
c
*
(
width
*
H
)
+
4
*
h
*
width
+
4
*
W
*
(
n
/
4
)
+
size_t
index
=
4
*
c
*
(
width
*
H
)
+
4
*
h
*
width
+
4
*
W
*
(
n
/
4
)
+
w
*
4
+
n
%
4
;
w
*
4
+
n
%
4
;
*
p
=
Half2Float
(
image
[
index
])
;
*
p
=
image
[
index
]
;
p
++
;
p
++
;
if
(
index
>=
(
width
*
height
*
4
))
{
if
(
index
>=
(
width
*
height
*
4
))
{
LOG
(
INFO
)
<<
" index out of range "
;
LOG
(
INFO
)
<<
" index out of range "
;
...
@@ -312,7 +311,7 @@ DDim CLImageConverterDWBlock::InitImageDimInfoWith(const DDim &tensor_dim) {
...
@@ -312,7 +311,7 @@ DDim CLImageConverterDWBlock::InitImageDimInfoWith(const DDim &tensor_dim) {
static_cast
<
DDim
::
value_type
>
(
height
)}));
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
}
void
CLImageConverterDWBlock
::
NCHWToImage
(
float
*
tensor
,
half_
t
*
image
,
void
CLImageConverterDWBlock
::
NCHWToImage
(
float
*
tensor
,
floa
t
*
image
,
const
DDim
&
tensor_dim
)
{
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
...
@@ -344,7 +343,7 @@ void CLImageConverterDWBlock::NCHWToImage(float *tensor, half_t *image,
...
@@ -344,7 +343,7 @@ void CLImageConverterDWBlock::NCHWToImage(float *tensor, half_t *image,
if
(
c
<
C
)
{
if
(
c
<
C
)
{
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
// (c % 4);
image
[
i2
]
=
Float2Half
(
*
p
)
;
image
[
i2
]
=
*
p
;
i2
+=
4
;
i2
+=
4
;
p
++
;
p
++
;
}
else
{
}
else
{
...
@@ -359,7 +358,7 @@ void CLImageConverterDWBlock::NCHWToImage(float *tensor, half_t *image,
...
@@ -359,7 +358,7 @@ void CLImageConverterDWBlock::NCHWToImage(float *tensor, half_t *image,
}
}
}
}
void
CLImageConverterDWBlock
::
ImageToNCHW
(
half_
t
*
image
,
float
*
tensor
,
void
CLImageConverterDWBlock
::
ImageToNCHW
(
floa
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
...
@@ -377,7 +376,7 @@ void CLImageConverterDWBlock::ImageToNCHW(half_t *image, float *tensor,
...
@@ -377,7 +376,7 @@ void CLImageConverterDWBlock::ImageToNCHW(half_t *image, float *tensor,
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
*
p
=
Half2Float
(
image
[
i2
])
;
*
p
=
image
[
i2
]
;
i2
+=
4
;
i2
+=
4
;
p
++
;
p
++
;
}
}
...
@@ -410,7 +409,7 @@ DDim CLImageConverterNormal::InitImageDimInfoWith(const DDim &tensor_dim) {
...
@@ -410,7 +409,7 @@ DDim CLImageConverterNormal::InitImageDimInfoWith(const DDim &tensor_dim) {
static_cast
<
DDim
::
value_type
>
(
height
)}));
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
}
void
CLImageConverterNormal
::
NCHWToImage
(
float
*
tensor
,
half_
t
*
image
,
void
CLImageConverterNormal
::
NCHWToImage
(
float
*
tensor
,
floa
t
*
image
,
const
DDim
&
tensor_dim
)
{
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
<=
4
&&
tensor_dim
.
size
()
>
0
)
CHECK
(
tensor_dim
.
size
()
<=
4
&&
tensor_dim
.
size
()
>
0
)
<<
" Tensor dim is not support!"
;
<<
" Tensor dim is not support!"
;
...
@@ -419,7 +418,7 @@ void CLImageConverterNormal::NCHWToImage(float *tensor, half_t *image,
...
@@ -419,7 +418,7 @@ void CLImageConverterNormal::NCHWToImage(float *tensor, half_t *image,
default_converter
.
NCHWToImage
(
tensor
,
image
,
tensor_dim
);
default_converter
.
NCHWToImage
(
tensor
,
image
,
tensor_dim
);
}
}
void
CLImageConverterNormal
::
ImageToNCHW
(
half_
t
*
image
,
float
*
tensor
,
void
CLImageConverterNormal
::
ImageToNCHW
(
floa
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
const
DDim
&
tensor_dim
)
{
CLImageConverterDefault
default_converter
;
CLImageConverterDefault
default_converter
;
...
@@ -439,10 +438,10 @@ DDim CLImageConverterWinoTransWeight::InitImageDimInfoWith(
...
@@ -439,10 +438,10 @@ DDim CLImageConverterWinoTransWeight::InitImageDimInfoWith(
static_cast
<
DDim
::
value_type
>
(
height
)}));
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
}
void
CLImageConverterWinoTransWeight
::
NCHWToImage
(
float
*
tensor
,
half_
t
*
image
,
void
CLImageConverterWinoTransWeight
::
NCHWToImage
(
float
*
tensor
,
floa
t
*
image
,
const
DDim
&
tensor_dim
)
{}
const
DDim
&
tensor_dim
)
{}
void
CLImageConverterWinoTransWeight
::
ImageToNCHW
(
half_
t
*
image
,
float
*
tensor
,
void
CLImageConverterWinoTransWeight
::
ImageToNCHW
(
floa
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{}
const
DDim
&
tensor_dim
)
{}
...
...
paddle/fluid/lite/opencl/cl_image_converter.h
浏览文件 @
fc3a66ae
...
@@ -15,7 +15,6 @@ limitations under the License. */
...
@@ -15,7 +15,6 @@ limitations under the License. */
#pragma once
#pragma once
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_half.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -24,10 +23,10 @@ class CLImageConverterBase {
...
@@ -24,10 +23,10 @@ class CLImageConverterBase {
public:
public:
virtual
~
CLImageConverterBase
()
{}
virtual
~
CLImageConverterBase
()
{}
virtual
void
NCHWToImage
(
float
*
nchw
,
half_
t
*
image
,
virtual
void
NCHWToImage
(
float
*
nchw
,
floa
t
*
image
,
const
DDim
&
tensor_dim
)
=
0
;
const
DDim
&
tensor_dim
)
=
0
;
virtual
void
ImageToNCHW
(
half_
t
*
image
,
float
*
nchw
,
const
DDim
&
image_dim
,
virtual
void
ImageToNCHW
(
floa
t
*
image
,
float
*
nchw
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
=
0
;
const
DDim
&
tensor_dim
)
=
0
;
virtual
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
=
0
;
virtual
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
=
0
;
};
};
...
@@ -35,16 +34,16 @@ class CLImageConverterBase {
...
@@ -35,16 +34,16 @@ class CLImageConverterBase {
class
CLImageConverterDefault
:
public
CLImageConverterBase
{
class
CLImageConverterDefault
:
public
CLImageConverterBase
{
public:
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
nchw
,
half_
t
*
image
,
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
nchw
,
floa
t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
void
ImageToNCHW
(
floa
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
const
DDim
&
tensor_dim
);
};
};
class
CLImageConverterFolder
:
public
CLImageConverterBase
{
class
CLImageConverterFolder
:
public
CLImageConverterBase
{
public:
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_
t
*
image
,
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
floa
t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
void
ImageToNCHW
(
floa
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
const
DDim
&
tensor_dim
);
/*
/*
...
@@ -68,8 +67,8 @@ class CLImageConverterFolder : public CLImageConverterBase {
...
@@ -68,8 +67,8 @@ class CLImageConverterFolder : public CLImageConverterBase {
class
CLImageConverterNormal
:
public
CLImageConverterBase
{
class
CLImageConverterNormal
:
public
CLImageConverterBase
{
public:
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_
t
*
image
,
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
floa
t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
void
ImageToNCHW
(
floa
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
const
DDim
&
tensor_dim
);
/*
/*
...
@@ -92,22 +91,22 @@ class CLImageConverterNormal : public CLImageConverterBase {
...
@@ -92,22 +91,22 @@ class CLImageConverterNormal : public CLImageConverterBase {
class
CLImageConverterNWBlock
:
public
CLImageConverterBase
{
class
CLImageConverterNWBlock
:
public
CLImageConverterBase
{
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_
t
*
image
,
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
floa
t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
void
ImageToNCHW
(
floa
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
const
DDim
&
tensor_dim
);
};
};
class
CLImageConverterDWBlock
:
public
CLImageConverterBase
{
class
CLImageConverterDWBlock
:
public
CLImageConverterBase
{
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_
t
*
image
,
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
floa
t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
void
ImageToNCHW
(
floa
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
const
DDim
&
tensor_dim
);
};
};
class
CLImageConverterWinoTransWeight
:
public
CLImageConverterBase
{
class
CLImageConverterWinoTransWeight
:
public
CLImageConverterBase
{
public:
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_
t
*
image
,
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
floa
t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
void
ImageToNCHW
(
floa
t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
const
DDim
&
tensor_dim
);
};
};
...
...
paddle/fluid/lite/opencl/cl_
half.h
→
paddle/fluid/lite/opencl/cl_
kernel/channel_add_kernel.cl
浏览文件 @
fc3a66ae
...
@@ -12,21 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,21 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See
the
License
for
the
specific
language
governing
permissions
and
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
limitations
under
the
License.
*/
#pragma once
__kernel
void
channel_add
(
__read_only
image2d_t
input,
__read_only
image2d_t
bias,
__write_only
image2d_t
outputImage,
__private
const
int
w
)
{
#include <cstdint>
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
namespace
paddle
{
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
namespace
lite
{
int2
coords
;
coords.x
=
x
;
typedef
uint16_t
half_t
;
coords.y
=
y
;
int2
coords_bias
;
half_t
Float2Half
(
float
f
);
coords_bias.x
=
x/w
;
coords_bias.y
=
0
;
float
Half2Float
(
half_t
h
);
float4
in
=
read_imagef
(
input,
sampler,
coords
)
;
float4
biase
=
read_imagef
(
bias,
sampler,
coords_bias
)
;
void
FloatArray2HalfArray
(
float
*
f_array
,
half_t
*
h_array
,
int
count
);
float4
output
=
in
+
biase
;
write_imagef
(
outputImage,
coords,
output
)
;
void
HalfArray2FloatArray
(
half_t
*
h_array
,
float
*
f_array
,
int
count
);
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
浏览文件 @
fc3a66ae
...
@@ -14,21 +14,19 @@ limitations under the License. */
...
@@ -14,21 +14,19 @@ limitations under the License. */
#pragma once
#pragma once
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
inline
float4
activation
(
float4
in
inline
half4
activation
(
half4
in
#ifdef PRELU
#ifdef PRELU
,
,
half
4
prelu_alpha
float
4
prelu_alpha
#endif
#endif
)
{
)
{
half
4
output
;
float
4
output
;
#ifdef PRELU
#ifdef PRELU
output
=
select
(
prelu_alpha
*
in
,
in
,
in
>=
(
half
4
)
0
.
0
);
output
=
select
(
prelu_alpha
*
in
,
in
,
in
>=
(
float
4
)
0
.
0
);
#endif
#endif
#ifdef RELU
#ifdef RELU
output
=
fmax
(
in
,
(
half
4
)(
0
.
0
f
));
output
=
fmax
(
in
,
(
float
4
)(
0
.
0
f
));
#endif
#endif
return
output
;
return
output
;
}
}
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
浏览文件 @
fc3a66ae
...
@@ -12,16 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,16 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See
the
License
for
the
specific
language
governing
permissions
and
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
limitations
under
the
License.
*/
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
__kernel
void
elementwise_add
(
__read_only
image2d_t
input,
__read_only
image2d_t
bias,
__write_only
image2d_t
outputImage
)
{
__kernel
void
elementwise_add
(
__global
image2d_t
input,
__global
image2d_t
bias,__write_only
image2d_t
outputImage
)
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int2
coords
;
int2
coords
;
coords.x
=
x
;
coords.x
=
x
;
coords.y
=
y
;
coords.y
=
y
;
half4
in
=
read_imageh
(
input,
sampler,
coords
)
;
float4
in
=
read_imagef
(
input,
sampler,
coords
)
;
half4
biase
=
read_imageh
(
bias,
sampler,
coords
)
;
float4
biase
=
read_imagef
(
bias,
sampler,
coords
)
;
half
4
output
=
in
+
biase
;
float
4
output
=
in
+
biase
;
write_image
h
(
outputImage,coords,output
)
;
write_image
f
(
outputImage,coords,output
)
;
}
}
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
浏览文件 @
fc3a66ae
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See
the
License
for
the
specific
language
governing
permissions
and
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
limitations
under
the
License.
*/
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
define
MIN_VALUE
-FLT_MAX
#
define
MIN_VALUE
-FLT_MAX
__kernel
void
pool_max
(
__kernel
void
pool_max
(
...
@@ -41,16 +40,16 @@ __kernel void pool_max(
...
@@ -41,16 +40,16 @@ __kernel void pool_max(
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
const
int
pos_in_y
=
out_n
*
in_height
;
half4
max_value
=
(
half
4
)(
MIN_VALUE
)
;
float4
max_value
=
(
float
4
)(
MIN_VALUE
)
;
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
half4
tmp
=
read_imageh
(
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
float4
tmp
=
read_imagef
(
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
max_value
=
max
(
max_value,
tmp
)
;
max_value
=
max
(
max_value,
tmp
)
;
}
}
}
}
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
write_image
h
(
output,
(
int2
)(
pos_out_x,
out_nh
)
,
max_value
)
;
write_image
f
(
output,
(
int2
)(
pos_out_x,
out_nh
)
,
max_value
)
;
}
}
__kernel
void
pool_avg
(
__kernel
void
pool_avg
(
...
@@ -77,15 +76,15 @@ __kernel void pool_avg(
...
@@ -77,15 +76,15 @@ __kernel void pool_avg(
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
const
int
pos_in_y
=
out_n
*
in_height
;
half4
sum
=
(
half
4
)(
0.0f
)
;
float4
sum
=
(
float
4
)(
0.0f
)
;
int
num
=
0
;
int
num
=
0
;
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
sum
+=
read_image
h
(
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
sum
+=
read_image
f
(
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
num++
;
num++
;
}
}
}
}
half
4
avg
=
sum
/
num
;
float
4
avg
=
sum
/
num
;
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
write_image
h
(
output,
(
int2
)(
pos_out_x,
out_nh
)
,
avg
)
;
write_image
f
(
output,
(
int2
)(
pos_out_x,
out_nh
)
,
avg
)
;
}
}
paddle/fluid/lite/opencl/cl_test.cc
浏览文件 @
fc3a66ae
...
@@ -67,28 +67,28 @@ TEST(cl_test, kernel_test) {
...
@@ -67,28 +67,28 @@ TEST(cl_test, kernel_test) {
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
auto
kernel
=
helper
->
GetKernel
(
2
);
auto
kernel
=
helper
->
GetKernel
(
2
);
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
1024
*
512
]);
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
4
*
3
*
256
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
for
(
int
i
=
0
;
i
<
4
*
3
*
256
*
512
;
i
++
)
{
in_data
[
i
]
=
1.
f
;
in_data
[
i
]
=
1.
f
;
}
}
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
4
,
3
,
256
,
512
});
CLImage
in_image
;
CLImage
in_image
;
in_image
.
set_tensor_data
(
in_data
.
get
(),
in_dim
);
in_image
.
set_tensor_data
(
in_data
.
get
(),
in_dim
);
in_image
.
InitNormalCLImage
(
helper
->
OpenCLContext
());
in_image
.
InitNormalCLImage
(
helper
->
OpenCLContext
());
LOG
(
INFO
)
<<
in_image
;
LOG
(
INFO
)
<<
in_image
;
std
::
unique_ptr
<
float
[]
>
bias_data
(
new
float
[
1024
*
512
]);
std
::
unique_ptr
<
float
[]
>
bias_data
(
new
float
[
4
*
3
*
256
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
for
(
int
i
=
0
;
i
<
4
*
3
*
256
*
512
;
i
++
)
{
bias_data
[
i
]
=
2.
f
;
bias_data
[
i
]
=
2.
f
;
}
}
const
DDim
bias_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
const
DDim
bias_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
4
,
3
,
256
,
512
});
CLImage
bias_image
;
CLImage
bias_image
;
bias_image
.
set_tensor_data
(
bias_data
.
get
(),
bias_dim
);
bias_image
.
set_tensor_data
(
bias_data
.
get
(),
bias_dim
);
bias_image
.
InitNormalCLImage
(
helper
->
OpenCLContext
());
bias_image
.
InitNormalCLImage
(
helper
->
OpenCLContext
());
LOG
(
INFO
)
<<
bias_image
;
LOG
(
INFO
)
<<
bias_image
;
CLImage
out_image
;
CLImage
out_image
;
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
4
,
3
,
256
,
512
});
out_image
.
InitEmptyImage
(
helper
->
OpenCLContext
(),
out_dim
);
out_image
.
InitEmptyImage
(
helper
->
OpenCLContext
(),
out_dim
);
LOG
(
INFO
)
<<
out_image
;
LOG
(
INFO
)
<<
out_image
;
...
@@ -108,7 +108,8 @@ TEST(cl_test, kernel_test) {
...
@@ -108,7 +108,8 @@ TEST(cl_test, kernel_test) {
status
=
helper
->
OpenCLCommandQueue
().
enqueueNDRangeKernel
(
status
=
helper
->
OpenCLCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
nullptr
,
&
event
);
kernel
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
nullptr
,
&
event
);
CL_CHECK_ERRORS
(
status
);
CL_CHECK_ERRORS
(
status
);
status
=
helper
->
OpenCLCommandQueue
().
finish
();
CL_CHECK_ERRORS
(
status
);
double
start_nanos
=
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_START
>
();
double
start_nanos
=
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_START
>
();
double
stop_nanos
=
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_END
>
();
double
stop_nanos
=
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_END
>
();
double
elapsed_micros
=
(
stop_nanos
-
start_nanos
)
/
1000.0
;
double
elapsed_micros
=
(
stop_nanos
-
start_nanos
)
/
1000.0
;
...
@@ -116,37 +117,99 @@ TEST(cl_test, kernel_test) {
...
@@ -116,37 +117,99 @@ TEST(cl_test, kernel_test) {
LOG
(
INFO
)
<<
out_image
;
LOG
(
INFO
)
<<
out_image
;
}
}
TEST
(
cl_test
,
elementwise
_add_test
)
{
TEST
(
cl_test
,
channel
_add_test
)
{
std
::
default_random_engine
engine
;
std
::
default_random_engine
engine
;
std
::
uniform_real_distribution
<
float
>
dist
(
-
5
,
5
);
std
::
uniform_real_distribution
<
float
>
dist
(
-
5
,
5
);
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
4
,
16
,
256
,
512
});
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
1024
*
512
]);
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
4
*
16
*
256
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
++
)
{
in_data
[
i
]
=
dist
(
engine
);
in_data
[
i
]
=
dist
(
engine
);
}
}
const
DDim
bias_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1
024
,
512
});
const
DDim
bias_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1
6
});
std
::
unique_ptr
<
float
[]
>
bias_data
(
new
float
[
1
024
*
512
]);
std
::
unique_ptr
<
float
[]
>
bias_data
(
new
float
[
1
6
]);
for
(
int
i
=
0
;
i
<
1
024
*
512
;
i
++
)
{
for
(
int
i
=
0
;
i
<
1
6
;
i
++
)
{
bias_data
[
i
]
=
dist
(
engine
);
bias_data
[
i
]
=
dist
(
engine
);
}
}
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
std
::
unique_ptr
<
float
[]
>
out_ref
(
new
float
[
4
*
16
*
256
*
512
]);
std
::
unique_ptr
<
float
[]
>
out
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
for
(
int
j
=
0
;
j
<
16
;
j
++
)
{
float
b
=
bias_data
[
j
];
for
(
int
k
=
0
;
k
<
256
*
512
;
k
++
)
{
int
index
=
(
i
*
16
+
j
)
*
256
*
512
+
k
;
out_ref
[
index
]
=
in_data
[
index
]
+
b
;
}
}
}
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
4
,
16
,
256
,
512
});
std
::
unique_ptr
<
float
[]
>
out
(
new
float
[
4
*
16
*
256
*
512
]);
bool
status
=
InitOpenCLEngine
(
FLAGS_cl_path
);
bool
status
=
InitOpenCLEngine
(
FLAGS_cl_path
);
CHECK
(
status
)
<<
"Fail to initialize OpenCL engine."
;
CHECK
(
status
)
<<
"Fail to initialize OpenCL engine."
;
CLContext
context
;
std
::
unique_ptr
<
CLContext
>
context
(
new
CLContext
);
std
::
unique_ptr
<
CLHelper
>
helper
(
new
CLHelper
(
context
.
get
()));
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
->
AddKernel
(
"channel_add"
,
"channel_add_kernel.cl"
);
elementwise_add
(
helper
.
get
(),
in_data
.
get
(),
in_dim
,
bias_data
.
get
(),
bias_dim
,
out
.
get
(),
out_dim
);
elementwise_add
(
&
context
,
in_data
.
get
(),
in_dim
,
bias_data
.
get
(),
bias_dim
,
int
stride
=
4
*
16
*
256
*
512
/
20
;
out
.
get
(),
out_dim
);
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
+=
stride
)
{
std
::
cout
<<
out
[
i
]
<<
" "
;
}
int
stride
=
1024
*
512
/
20
;
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
++
)
{
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
+=
stride
)
{
EXPECT_NEAR
(
out
[
i
],
out_ref
[
i
],
1e-6
);
}
std
::
cout
<<
std
::
endl
;
}
TEST
(
cl_test
,
elementwise_add_test
)
{
std
::
default_random_engine
engine
;
std
::
uniform_real_distribution
<
float
>
dist
(
-
5
,
5
);
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
4
,
16
,
256
,
512
});
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
4
*
16
*
256
*
512
]);
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
++
)
{
in_data
[
i
]
=
dist
(
engine
);
}
const
DDim
bias_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
4
,
16
,
256
,
512
});
std
::
unique_ptr
<
float
[]
>
bias_data
(
new
float
[
4
*
16
*
256
*
512
]);
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
++
)
{
bias_data
[
i
]
=
dist
(
engine
);
}
std
::
unique_ptr
<
float
[]
>
out_ref
(
new
float
[
4
*
16
*
256
*
512
]);
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
++
)
{
out_ref
[
i
]
=
in_data
[
i
]
+
bias_data
[
i
];
}
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
4
,
16
,
256
,
512
});
std
::
unique_ptr
<
float
[]
>
out
(
new
float
[
4
*
16
*
256
*
512
]);
bool
status
=
InitOpenCLEngine
(
FLAGS_cl_path
);
CHECK
(
status
)
<<
"Fail to initialize OpenCL engine."
;
std
::
unique_ptr
<
CLContext
>
context
(
new
CLContext
);
std
::
unique_ptr
<
CLHelper
>
helper
(
new
CLHelper
(
context
.
get
()));
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
->
AddKernel
(
"channel_add"
,
"channel_add_kernel.cl"
);
elementwise_add
(
helper
.
get
(),
in_data
.
get
(),
in_dim
,
bias_data
.
get
(),
bias_dim
,
out
.
get
(),
out_dim
);
int
stride
=
4
*
16
*
256
*
512
/
20
;
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
+=
stride
)
{
std
::
cout
<<
out
[
i
]
<<
" "
;
std
::
cout
<<
out
[
i
]
<<
" "
;
}
}
for
(
int
i
=
0
;
i
<
4
*
16
*
256
*
512
;
i
++
)
{
EXPECT_NEAR
(
out
[
i
],
out_ref
[
i
],
1e-6
);
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录