Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
4cd94a44
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4cd94a44
编写于
8月 10, 2020
作者:
C
chenzupeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
transpose ouptut image2d
上级
767c04ef
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
43 addition
and
14 deletion
+43
-14
mindspore/lite/src/runtime/kernel/opencl/cl/fp16/transpose.cl
...spore/lite/src/runtime/kernel/opencl/cl/fp16/transpose.cl
+6
-5
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/transpose.cl
...spore/lite/src/runtime/kernel/opencl/cl/fp32/transpose.cl
+6
-5
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
...spore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
+3
-3
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
+15
-0
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h
+1
-1
mindspore/lite/test/st/benchmark_test.cc
mindspore/lite/test/st/benchmark_test.cc
+9
-0
mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc
...lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc
+3
-0
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/cl/fp16/transpose.cl
浏览文件 @
4cd94a44
#
define
FLT
half
#
define
FLT
half
#
define
FLT4
half4
#
define
FLT4
half4
#
define
READ_IMAGE
read_imageh
#
define
READ_IMAGE
read_imageh
#
define
WRITE_IMAGE
write_imageh
__constant
sampler_t
smp_zero
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
__constant
sampler_t
smp_zero
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
__kernel
void
transpose
(
__read_only
image2d_t
src_data,
__
global
float4
*
dst_data,
int2
HW,
int2
C
)
{
__kernel
void
transpose
(
__read_only
image2d_t
src_data,
__
write_only
image2d_t
dst_data,
int2
HW,
int2
C
)
{
int
X
=
get_global_id
(
0
)
;
int
X
=
get_global_id
(
0
)
;
int
Y
=
get_global_id
(
1
)
;
int
Y
=
get_global_id
(
1
)
;
if
(
X
>=
HW.y
||
Y
>=
C.y
)
{
if
(
X
>=
HW.y
||
Y
>=
C.y
)
{
...
@@ -37,8 +38,8 @@ __kernel void transpose(__read_only image2d_t src_data, __global float4 *dst_dat
...
@@ -37,8 +38,8 @@ __kernel void transpose(__read_only image2d_t src_data, __global float4 *dst_dat
result[3].z
=
x2.w
;
result[3].z
=
x2.w
;
result[3].w
=
x3.w
;
result[3].w
=
x3.w
;
dst_data[4
*
Y
*
HW.y
+
X]
=
result[0]
;
WRITE_IMAGE
(
dst_data,
(
int2
)(
X,
4
*
Y
)
,
result[0]
)
;
dst_data[
(
4
*
Y
+
1
)
*
HW.y
+
X]
=
result[1]
;
WRITE_IMAGE
(
dst_data,
(
int2
)(
X,
4
*
Y
+
1
)
,
result[1]
)
;
dst_data[
(
4
*
Y
+
2
)
*
HW.y
+
X]
=
result[2]
;
WRITE_IMAGE
(
dst_data,
(
int2
)(
X,
4
*
Y
+
2
)
,
result[2]
)
;
dst_data[
(
4
*
Y
+
3
)
*
HW.y
+
X]
=
result[3]
;
WRITE_IMAGE
(
dst_data,
(
int2
)(
X,
4
*
Y
+
3
)
,
result[3]
)
;
}
}
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/transpose.cl
浏览文件 @
4cd94a44
#
define
FLT
float
#
define
FLT
float
#
define
FLT4
float4
#
define
FLT4
float4
#
define
READ_IMAGE
read_imagef
#
define
READ_IMAGE
read_imagef
#
define
WRITE_IMAGE
write_imagef
__constant
sampler_t
smp_zero
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
__constant
sampler_t
smp_zero
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
__kernel
void
transpose
(
__read_only
image2d_t
src_data,
__
global
float4
*
dst_data,
int2
HW,
int2
C
)
{
__kernel
void
transpose
(
__read_only
image2d_t
src_data,
__
write_only
image2d_t
dst_data,
int2
HW,
int2
C
)
{
int
X
=
get_global_id
(
0
)
;
int
X
=
get_global_id
(
0
)
;
int
Y
=
get_global_id
(
1
)
;
int
Y
=
get_global_id
(
1
)
;
if
(
X
>=
HW.y
||
Y
>=
C.y
)
{
if
(
X
>=
HW.y
||
Y
>=
C.y
)
{
...
@@ -37,8 +38,8 @@ __kernel void transpose(__read_only image2d_t src_data, __global float4 *dst_dat
...
@@ -37,8 +38,8 @@ __kernel void transpose(__read_only image2d_t src_data, __global float4 *dst_dat
result[3].z
=
x2.w
;
result[3].z
=
x2.w
;
result[3].w
=
x3.w
;
result[3].w
=
x3.w
;
dst_data[4
*
Y
*
HW.y
+
X]
=
result[0]
;
WRITE_IMAGE
(
dst_data,
(
int2
)(
X,
4
*
Y
)
,
result[0]
)
;
dst_data[
(
4
*
Y
+
1
)
*
HW.y
+
X]
=
result[1]
;
WRITE_IMAGE
(
dst_data,
(
int2
)(
X,
4
*
Y
+
1
)
,
result[1]
)
;
dst_data[
(
4
*
Y
+
2
)
*
HW.y
+
X]
=
result[2]
;
WRITE_IMAGE
(
dst_data,
(
int2
)(
X,
4
*
Y
+
2
)
,
result[2]
)
;
dst_data[
(
4
*
Y
+
3
)
*
HW.y
+
X]
=
result[3]
;
WRITE_IMAGE
(
dst_data,
(
int2
)(
X,
4
*
Y
+
3
)
,
result[3]
)
;
}
}
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
浏览文件 @
4cd94a44
...
@@ -43,7 +43,7 @@ void ArithmeticOpenCLKernel::Image2dGetWorkGroupSize() {
...
@@ -43,7 +43,7 @@ void ArithmeticOpenCLKernel::Image2dGetWorkGroupSize() {
size_t
H
=
outputs_
[
0
]
->
Batch
()
*
outputs_
[
0
]
->
Height
();
size_t
H
=
outputs_
[
0
]
->
Batch
()
*
outputs_
[
0
]
->
Height
();
size_t
W
=
outputs_
[
0
]
->
Width
()
*
UP_DIV
(
outputs_
[
0
]
->
Channel
(),
C4NUM
);
size_t
W
=
outputs_
[
0
]
->
Width
()
*
UP_DIV
(
outputs_
[
0
]
->
Channel
(),
C4NUM
);
local_size_
=
{
16
,
16
};
local_size_
=
{
16
,
16
};
global_size_
=
{
H
,
W
};
global_size_
=
{
W
,
H
};
}
}
void
ArithmeticOpenCLKernel
::
BufferGetWorkGroupSize
()
{
void
ArithmeticOpenCLKernel
::
BufferGetWorkGroupSize
()
{
...
@@ -140,7 +140,7 @@ int ArithmeticOpenCLKernel::Run() {
...
@@ -140,7 +140,7 @@ int ArithmeticOpenCLKernel::Run() {
bias_
=
-
1
*
value
;
bias_
=
-
1
*
value
;
break
;
break
;
case
PrimitiveType_Div
:
case
PrimitiveType_Div
:
bias
_
=
1
/
value
;
weight
_
=
1
/
value
;
break
;
break
;
default:
default:
MS_LOG
(
ERROR
)
<<
"Error Operator type "
<<
opParameter
->
type_
;
MS_LOG
(
ERROR
)
<<
"Error Operator type "
<<
opParameter
->
type_
;
...
@@ -152,7 +152,7 @@ int ArithmeticOpenCLKernel::Run() {
...
@@ -152,7 +152,7 @@ int ArithmeticOpenCLKernel::Run() {
runtime_
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
outputs_
[
0
]
->
Data
());
runtime_
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
outputs_
[
0
]
->
Data
());
int
H
=
outputs_
[
0
]
->
Batch
()
*
outputs_
[
0
]
->
Height
();
int
H
=
outputs_
[
0
]
->
Batch
()
*
outputs_
[
0
]
->
Height
();
int
W
=
outputs_
[
0
]
->
Width
()
*
UP_DIV
(
outputs_
[
0
]
->
Channel
(),
C4NUM
);
int
W
=
outputs_
[
0
]
->
Width
()
*
UP_DIV
(
outputs_
[
0
]
->
Channel
(),
C4NUM
);
cl_int2
output_shape
{
H
,
W
};
cl_int2
output_shape
{
W
,
H
};
runtime_
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
output_shape
);
runtime_
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
output_shape
);
runtime_
->
RunKernel
(
kernel_
,
global_size_
,
local_size_
,
nullptr
);
runtime_
->
RunKernel
(
kernel_
,
global_size_
,
local_size_
,
nullptr
);
return
0
;
return
0
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
浏览文件 @
4cd94a44
...
@@ -67,6 +67,21 @@ int TransposeOpenCLKernel::Init() {
...
@@ -67,6 +67,21 @@ int TransposeOpenCLKernel::Init() {
int
TransposeOpenCLKernel
::
ReSize
()
{
return
0
;
}
int
TransposeOpenCLKernel
::
ReSize
()
{
return
0
;
}
int
TransposeOpenCLKernel
::
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
{
size_t
im_dst_x
,
im_dst_y
;
im_dst_x
=
UP_DIV
(
outputs_
[
0
]
->
Height
()
*
outputs_
[
0
]
->
Width
(),
C4NUM
);
im_dst_y
=
outputs_
[
0
]
->
Channel
();
#ifdef ENABLE_FP16
size_t
img_dtype
=
CL_HALF_FLOAT
;
#else
size_t
img_dtype
=
CL_FLOAT
;
#endif
img_size
->
clear
();
std
::
vector
<
size_t
>
vec
{
im_dst_x
,
im_dst_y
,
img_dtype
};
*
img_size
=
vec
;
return
RET_OK
;
}
int
TransposeOpenCLKernel
::
Run
()
{
int
TransposeOpenCLKernel
::
Run
()
{
MS_LOG
(
DEBUG
)
<<
this
->
Name
()
<<
" Running!"
;
MS_LOG
(
DEBUG
)
<<
this
->
Name
()
<<
" Running!"
;
std
::
vector
<
int
>
shapex
=
inputs_
[
0
]
->
shape
();
std
::
vector
<
int
>
shapex
=
inputs_
[
0
]
->
shape
();
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h
浏览文件 @
4cd94a44
...
@@ -35,7 +35,7 @@ class TransposeOpenCLKernel : public OpenCLKernel {
...
@@ -35,7 +35,7 @@ class TransposeOpenCLKernel : public OpenCLKernel {
int
Init
()
override
;
int
Init
()
override
;
int
ReSize
()
override
;
int
ReSize
()
override
;
int
Run
()
override
;
int
Run
()
override
;
int
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
override
;
private:
private:
cl
::
Kernel
kernel_
;
cl
::
Kernel
kernel_
;
};
};
...
...
mindspore/lite/test/st/benchmark_test.cc
浏览文件 @
4cd94a44
...
@@ -50,6 +50,15 @@ auto status = RunBenchmark(5, argv);
...
@@ -50,6 +50,15 @@ auto status = RunBenchmark(5, argv);
ASSERT_EQ
(
status
,
RET_OK
);
ASSERT_EQ
(
status
,
RET_OK
);
}
}
TEST_F
(
BenchmarkTest
,
Test_MV2_GPU
)
{
const
char
*
argv
[]
=
{
"./benchmark"
,
"--modelPath=./hiai/mobilenet_v2.ms"
,
"--inDataPath=./hiai/mobilenet_v2_in.bin"
,
"--calibDataPath=./hiai/mobilenet_v2_out.bin"
,
"--device=GPU"
};
auto
status
=
RunBenchmark
(
5
,
argv
);
ASSERT_EQ
(
status
,
RET_OK
);
}
TEST_F
(
BenchmarkTest
,
TestHebing
)
{
TEST_F
(
BenchmarkTest
,
TestHebing
)
{
const
char
*
argv
[]
=
{
"./benchmark"
,
"--modelPath=./hiai/model_hebing_3branch.ms"
,
const
char
*
argv
[]
=
{
"./benchmark"
,
"--modelPath=./hiai/model_hebing_3branch.ms"
,
"--inDataPath=./hiai/model_hebing_3branch.bin"
,
"--inDataPath=./hiai/model_hebing_3branch.bin"
,
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc
浏览文件 @
4cd94a44
...
@@ -66,6 +66,9 @@ TEST_F(TestTransposeOpenCL, TransposeFp32) {
...
@@ -66,6 +66,9 @@ TEST_F(TestTransposeOpenCL, TransposeFp32) {
size_n
=
size_n
>
100
?
100
:
size_n
;
size_n
=
size_n
>
100
?
100
:
size_n
;
for
(
int
i
=
0
;
i
<
size_n
;
i
++
)
{
for
(
int
i
=
0
;
i
<
size_n
;
i
++
)
{
std
::
cout
<<
output_data
[
i
]
<<
" "
;
std
::
cout
<<
output_data
[
i
]
<<
" "
;
if
((
i
+
1
)
%
c
==
0
)
{
std
::
cout
<<
std
::
endl
;
}
}
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录