Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
52e2d925
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
52e2d925
编写于
8月 17, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 17, 2020
浏览文件
操作
浏览文件
下载
差异文件
!4615 [MS][LITE][GPU]transpose output support buffer
Merge pull request !4615 from chenzupeng/master-lite
上级
81833943
4d3be49a
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
134 addition
and
13 deletion
+134
-13
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/transpose.cl
...spore/lite/src/runtime/kernel/opencl/cl/fp32/transpose.cl
+42
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
+10
-2
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h
+1
-0
mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
.../lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
+4
-4
mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
...ite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
+2
-0
mindspore/lite/test/ut/src/runtime/kernel/opencl/avg_pooling_tests.cc
...te/test/ut/src/runtime/kernel/opencl/avg_pooling_tests.cc
+5
-0
mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
...re/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
+8
-0
mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc
...st/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc
+9
-0
mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc
...te/test/ut/src/runtime/kernel/opencl/convolution_tests.cc
+8
-0
mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
...re/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
+9
-6
mindspore/lite/test/ut/src/runtime/kernel/opencl/max_pooling_tests.cc
...te/test/ut/src/runtime/kernel/opencl/max_pooling_tests.cc
+9
-0
mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_tests.cc
...e/lite/test/ut/src/runtime/kernel/opencl/softmax_tests.cc
+9
-0
mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc
...lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc
+9
-0
mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc
...lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc
+9
-0
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/transpose.cl
浏览文件 @
52e2d925
...
...
@@ -3,7 +3,7 @@
#
define
READ_IMAGE
read_imagef
#
define
WRITE_IMAGE
write_imagef
__constant
sampler_t
smp_zero
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
__kernel
void
transpose
(
__read_only
image2d_t
src_data,
__write_only
image2d_t
dst_data,
int2
HW,
int2
C
)
{
__kernel
void
transpose
_IMG
(
__read_only
image2d_t
src_data,
__write_only
image2d_t
dst_data,
int2
HW,
int2
C
)
{
int
X
=
get_global_id
(
0
)
;
int
Y
=
get_global_id
(
1
)
;
if
(
X
>=
HW.y
|
| Y >= C.y) {
...
...
@@ -43,3 +43,44 @@ __kernel void transpose(__read_only image2d_t src_data, __write_only image2d_t d
WRITE_IMAGE(dst_data, (int2)(X, 4 * Y + 2), result[2]);
WRITE_IMAGE(dst_data, (int2)(X, 4 * Y + 3), result[3]);
}
__kernel void transpose_BUF(__read_only image2d_t src_data, global FLT4 *dst_data, int2 HW, int2 C) {
int X = get_global_id(0);
int Y = get_global_id(1);
if (X >= HW.y |
|
Y
>=
C.y
)
{
return
;
}
FLT4
result[4]
;
result[0]
=
(
FLT4
)(
0.0f
)
;
result[1]
=
(
FLT4
)(
0.0f
)
;
result[2]
=
(
FLT4
)(
0.0f
)
;
result[3]
=
(
FLT4
)(
0.0f
)
;
FLT4
x0
=
READ_IMAGE
(
src_data,
smp_zero,
(
int2
)(
Y,
4
*
X
))
;
FLT4
x1
=
READ_IMAGE
(
src_data,
smp_zero,
(
int2
)(
Y,
4
*
X
+
1
))
;
FLT4
x2
=
READ_IMAGE
(
src_data,
smp_zero,
(
int2
)(
Y,
4
*
X
+
2
))
;
FLT4
x3
=
READ_IMAGE
(
src_data,
smp_zero,
(
int2
)(
Y,
4
*
X
+
3
))
;
result[0].x
=
x0.x
;
result[0].y
=
x1.x
;
result[0].z
=
x2.x
;
result[0].w
=
x3.x
;
result[1].x
=
x0.y
;
result[1].y
=
x1.y
;
result[1].z
=
x2.y
;
result[1].w
=
x3.y
;
result[2].x
=
x0.z
;
result[2].y
=
x1.z
;
result[2].z
=
x2.z
;
result[2].w
=
x3.z
;
result[3].x
=
x0.w
;
result[3].y
=
x1.w
;
result[3].z
=
x2.w
;
result[3].w
=
x3.w
;
dst_data[4
*
Y
*
HW.y
+
X]
=
result[0]
;
dst_data[
(
4
*
Y
+
1
)
*
HW.y
+
X]
=
result[1]
;
dst_data[
(
4
*
Y
+
2
)
*
HW.y
+
X]
=
result[2]
;
dst_data[
(
4
*
Y
+
3
)
*
HW.y
+
X]
=
result[3]
;
}
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
浏览文件 @
52e2d925
...
...
@@ -36,7 +36,11 @@ namespace mindspore::kernel {
int
TransposeOpenCLKernel
::
Init
()
{
std
::
string
kernel_name
=
"transpose"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
if
(
!
is_image_out_
)
{
kernel_name
+=
"_BUF"
;
}
else
{
kernel_name
+=
"_IMG"
;
}
#ifdef PROGRAM_WITH_IL
ocl_runtime
->
CreateKernelFromIL
(
kernel_
(),
kernel_name
);
#else
...
...
@@ -60,8 +64,12 @@ int TransposeOpenCLKernel::Init() {
MS_LOG
(
ERROR
)
<<
"input H * W % 4 != 0 not support!"
;
return
RET_ERROR
;
}
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
// Transpose::InferShape just set output->SetFormat(input->GetFormat()); -^-!
ori_format_
=
schema
::
Format_NCHW
;
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NCHW
);
if
(
!
is_image_out_
)
{
out_mem_type_
=
OpenCLMemType
::
BUF
;
}
MS_LOG
(
DEBUG
)
<<
kernel_name
<<
" Init Done!"
;
return
RET_OK
;
}
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h
浏览文件 @
52e2d925
...
...
@@ -38,6 +38,7 @@ class TransposeOpenCLKernel : public OpenCLKernel {
private:
cl
::
Kernel
kernel_
;
bool
is_image_out_
=
false
;
};
}
// namespace mindspore::kernel
...
...
mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
浏览文件 @
52e2d925
...
...
@@ -34,13 +34,13 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor *
out_parameters
->
clear
();
out_convert_ops
->
clear
();
for
(
size_t
i
=
0
;
i
<
in_tensors
.
size
();
++
i
)
{
OpenCLKernel
*
cur_opencl_op
=
reinterpret_cast
<
OpenCLKernel
*>
(
in_kernels
[
i
]);
OpenCLKernel
*
cur_opencl_op
=
reinterpret_cast
<
OpenCLKernel
*>
(
in_kernels
[
i
]);
schema
::
Format
ori_format
=
cur_opencl_op
->
GetOriFormat
();
if
(
mem_type
==
cur_opencl_op
->
GetMemType
()
&&
in_tensors
[
i
]
->
GetFormat
()
==
ori_format
)
{
if
(
mem_type
==
OpenCLMemType
::
BUF
&&
mem_type
==
cur_opencl_op
->
GetMemType
()
&&
in_tensors
[
i
]
->
GetFormat
()
==
ori_format
)
{
continue
;
}
auto
dst_format
=
(
mem_type
==
OpenCLMemType
::
IMG
)
?
in_kernels
[
i
]
->
out_tensors
()[
0
]
->
GetFormat
()
:
ori_format
;
auto
dst_format
=
(
mem_type
==
OpenCLMemType
::
IMG
)
?
in_kernels
[
i
]
->
out_tensors
()[
0
]
->
GetFormat
()
:
ori_format
;
auto
src_format
=
(
mem_type
==
OpenCLMemType
::
IMG
)
?
in_tensors
[
i
]
->
GetFormat
()
:
in_kernels
[
i
]
->
out_tensors
()[
0
]
->
GetFormat
();
lite
::
tensor
::
Tensor
*
new_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
();
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
浏览文件 @
52e2d925
...
...
@@ -125,6 +125,7 @@ int RunSubGraphOpenCLKernel(const std::vector<lite::tensor::Tensor *> &inputs,
MS_LOG
(
ERROR
)
<<
"Run SubGraphOpenCLKernel error."
;
return
RET_ERROR
;
}
delete
sub_graph
;
return
RET_OK
;
}
...
...
@@ -180,6 +181,7 @@ TEST_F(TestActivationOpenCL, ActivationFp32_dim4) {
delete
input_tensor
;
delete
output_tensor
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
return
;
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/opencl/avg_pooling_tests.cc
浏览文件 @
52e2d925
...
...
@@ -119,6 +119,11 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) {
}
printf
(
"test all close OK!
\n
"
);
lite
::
CompareOutputData
(
output_data
,
expect
,
4
);
delete
tensor_in
;
delete
tensor_out
;
delete
pooling_kernel
;
delete
pGraph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
浏览文件 @
52e2d925
...
...
@@ -175,6 +175,14 @@ TEST_F(TestConcatOpenCL, ConcatFp32_2input_dim4_axis3) {
sub_graph
->
Run
();
auto
*
output_data_gpu
=
reinterpret_cast
<
float
*>
(
output_tensor
->
Data
());
CompareOutputData1
(
output_data_gpu
,
output_data_cpu
.
data
(),
output_tensor
->
ElementsNum
(),
0.00001
);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
concat_kernel
;
delete
sub_graph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc
浏览文件 @
52e2d925
...
...
@@ -108,5 +108,14 @@ TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
CompareOutputData
(
output_data
,
correct_data
,
oh
*
ow
*
co
,
0.00001
);
MS_LOG
(
INFO
)
<<
"Test Conv2dTransposeFp32 passed"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
arith_kernel
;
delete
pGraph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc
浏览文件 @
52e2d925
...
...
@@ -120,6 +120,14 @@ void TEST_MAIN(ConvParameter *param, schema::Format data_format, const std::stri
MyCompareOutput
(
output_tensor
,
expect_file
);
// lite::CompareOutput(reinterpret_cast<float *>(output_tensor->Data()), expect_file);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
conv_kernel
;
delete
sub_graph
;
mindspore
::
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
浏览文件 @
52e2d925
...
...
@@ -75,12 +75,15 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) {
// compare
CompareOutputData
(
output_data
,
correct_data
,
co
,
0.00001
);
delete
input_data
;
delete
weight_data
;
delete
tensor_x
;
delete
tensor_w
;
delete
tensor_out
;
delete
correct_data
;
MS_LOG
(
INFO
)
<<
"TestMatMulFp32 passed"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
arith_kernel
;
delete
pGraph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/opencl/max_pooling_tests.cc
浏览文件 @
52e2d925
...
...
@@ -92,6 +92,15 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) {
MS_LOG
(
INFO
)
<<
"compare result"
;
std
::
cout
<<
"compare result"
<<
std
::
endl
;
CompareOutput
(
output_tensor
,
expect_file
);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
pooling_kernel
;
delete
pGraph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_tests.cc
浏览文件 @
52e2d925
...
...
@@ -77,6 +77,15 @@ void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, st
MS_LOG
(
INFO
)
<<
"compare result"
;
std
::
cout
<<
"compare result"
<<
std
::
endl
;
CompareOutput
(
output_tensor
,
expect_file
);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
kernel
;
delete
pGraph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
TEST_F
(
TestSoftmaxOpenCL
,
Softmax_1
)
{
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc
浏览文件 @
52e2d925
...
...
@@ -75,5 +75,14 @@ TEST_F(TestToFormatOpenCL, TransposeFp32) {
// compare
CompareOutputData
(
output_data
,
correct_data
,
h
*
w
*
c
,
0.00001
);
MS_LOG
(
INFO
)
<<
"TestMatMulFp32 passed"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
arith_kernel
;
delete
pGraph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc
浏览文件 @
52e2d925
...
...
@@ -75,5 +75,14 @@ TEST_F(TestTransposeOpenCL, TransposeFp32) {
// compare
CompareOutputData
(
output_data
,
correct_data
,
h
*
w
*
c
,
0.00001
);
MS_LOG
(
INFO
)
<<
"TestMatMulFp32 passed"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
arith_kernel
;
delete
pGraph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录