Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
89ca31a0
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
89ca31a0
编写于
8月 21, 2020
作者:
C
chenzupeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix bug in matmul testcase
上级
11e670c5
变更
21
隐藏空白更改
内联
并排
Showing
21 changed file
with
165 addition
and
117 deletion
+165
-117
.gitignore
.gitignore
+3
-0
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
...spore/lite/src/runtime/kernel/opencl/kernel/activation.cc
+3
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
...spore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
+3
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
+3
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/caffe_prelu.cc
...pore/lite/src/runtime/kernel/opencl/kernel/caffe_prelu.cc
+3
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
+3
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
...lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
+3
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
...pore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
+4
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
...lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
+2
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
+69
-66
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h
+3
-7
mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
+3
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
+3
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
+8
-2
mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
+4
-2
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
+3
-7
mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
+5
-3
mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
.../lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
+16
-7
mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc
...st/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc
+2
-1
mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
...re/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
+14
-9
mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc
...lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc
+8
-3
未找到文件。
.gitignore
浏览文件 @
89ca31a0
...
...
@@ -98,3 +98,6 @@ mindspore/.commit_id
# lite test file
mindspore/lite/test/do_test/
# lite opencl compile file
*.cl.inc
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
浏览文件 @
89ca31a0
...
...
@@ -67,7 +67,9 @@ int ActivationOpenClKernel::Init() {
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
MS_LOG
(
DEBUG
)
<<
op_parameter_
->
name_
<<
" init Done!"
;
return
RET_OK
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
浏览文件 @
89ca31a0
...
...
@@ -122,7 +122,9 @@ int ArithmeticOpenCLKernel::Init() {
if
(
error_code
!=
RET_OK
)
{
return
error_code
;
}
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
Image2dGetWorkGroupSize
();
return
RET_OK
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
浏览文件 @
89ca31a0
...
...
@@ -56,7 +56,9 @@ int BatchNormOpenCLKernel::Init() {
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
return
RET_OK
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/caffe_prelu.cc
浏览文件 @
89ca31a0
...
...
@@ -63,7 +63,9 @@ int CaffePReluOpenCLKernel::Init() {
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
MS_LOG
(
DEBUG
)
<<
program_name
<<
" Init Done!"
;
return
RET_OK
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
浏览文件 @
89ca31a0
...
...
@@ -83,7 +83,9 @@ int ConcatOpenCLKernel::Init() {
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
}
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
return
RET_OK
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
浏览文件 @
89ca31a0
...
...
@@ -51,7 +51,9 @@ int Conv2dTransposeOpenCLKernel::Init() {
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
#endif
PadWeight
();
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
MS_LOG
(
DEBUG
)
<<
kernel_name
<<
" Init Done!"
;
return
RET_OK
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
浏览文件 @
89ca31a0
...
...
@@ -92,8 +92,11 @@ int ConvolutionOpenCLKernel::Init() {
}
this
->
InitBuffer
();
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
MS_LOG
(
DEBUG
)
<<
"Convolution Init Done!"
;
return
RET_OK
;
}
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
浏览文件 @
89ca31a0
...
...
@@ -42,7 +42,8 @@ int DepthwiseConv2dOpenCLKernel::Init() {
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
std
::
string
kernel_name
=
"DepthwiseConv2d"
;
auto
in_format
=
in_tensors_
[
0
]
->
GetFormat
();
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
in_ori_format_
=
in_format
;
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
in_format
);
if
(
in_format
!=
schema
::
Format_NHWC4
&&
in_format
!=
schema
::
Format_NC4HW4
)
{
MS_LOG
(
ERROR
)
<<
"input format("
<<
in_format
<<
") "
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
浏览文件 @
89ca31a0
...
...
@@ -44,26 +44,28 @@ int MatMulOpenCLKernel::Init() {
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
#endif
auto
weight_format
=
in_tensors_
[
1
]
->
GetFormat
();
if
(
weight_format
!=
schema
::
Format_NHWC
)
{
MS_LOG
(
ERROR
)
<<
"weight format("
<<
weight_format
<<
") "
<<
"format not support!"
;
return
1
;
int
ci
,
co
;
if
(
in_tensors_
[
1
]
->
shape
().
size
()
==
2
)
{
ci
=
in_tensors_
[
1
]
->
shape
()[
1
];
co
=
in_tensors_
[
1
]
->
shape
()[
0
];
}
else
{
ci
=
in_tensors_
[
1
]
->
shape
()[
3
];
co
=
in_tensors_
[
1
]
->
shape
()[
0
];
}
int
ci
=
in_tensors_
[
1
]
->
shape
()[
3
];
int
co
=
in_tensors_
[
1
]
->
shape
()[
0
];
sizeCI
=
{
ci
,
UP_DIV
(
ci
,
4
)};
sizeCO
=
{
co
,
UP_DIV
(
co
,
4
)};
auto
allocator
=
ocl_runtime
->
GetAllocator
();
padWeight_
=
reinterpret_cast
<
FLOAT_T
*>
(
allocator
->
Malloc
(
sizeCI
.
s
[
1
]
*
sizeCO
.
s
[
1
]
*
16
*
sizeof
(
FLOAT_T
)));
padWeight_
=
reinterpret_cast
<
FLOAT_T
*>
(
allocator
->
MapBuffer
(
padWeight_
,
CL_MAP_WRITE
,
nullptr
,
true
));
bias_
=
reinterpret_cast
<
FLOAT_T
*>
(
allocator
->
Malloc
(
sizeCO
.
s
[
1
]
*
4
*
sizeof
(
FLOAT_T
)));
bias_
=
reinterpret_cast
<
FLOAT_T
*>
(
allocator
->
MapBuffer
(
bias_
,
CL_MAP_WRITE
,
nullptr
,
true
));
sizeCI
=
{
ci
,
UP_DIV
(
ci
,
C4NUM
)};
sizeCO
=
{
co
,
UP_DIV
(
co
,
C4NUM
)};
PadWeight
();
allocator
->
UnmapBuffer
(
padWeight_
);
allocator
->
UnmapBuffer
(
bias_
);
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
(
);
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
o
ut_o
ri_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
if
(
out_tensors_
[
0
]
->
shape
().
size
()
==
2
)
{
out_ori_format_
=
schema
::
Format_NC
;
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NC4
);
in_ori_format_
=
schema
::
Format_NC
;
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NC4
);
}
MS_LOG
(
DEBUG
)
<<
kernel_name
<<
" Init Done!"
;
return
0
;
}
...
...
@@ -71,16 +73,22 @@ int MatMulOpenCLKernel::Init() {
int
MatMulOpenCLKernel
::
ReSize
()
{
return
0
;
}
void
MatMulOpenCLKernel
::
PadWeight
()
{
auto
origin_weight
=
reinterpret_cast
<
FLOAT_T
*>
(
in_tensors_
.
at
(
kWeightIndex
)
->
Data
());
auto
allocator
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
()
->
GetAllocator
();
padWeight_
=
reinterpret_cast
<
FLOAT_t
*>
(
allocator
->
Malloc
(
sizeCI
.
s
[
1
]
*
sizeCO
.
s
[
1
]
*
C4NUM
*
C4NUM
*
sizeof
(
FLOAT_t
)));
padWeight_
=
reinterpret_cast
<
FLOAT_t
*>
(
allocator
->
MapBuffer
(
padWeight_
,
CL_MAP_WRITE
,
nullptr
,
true
));
auto
origin_weight
=
reinterpret_cast
<
FLOAT_t
*>
(
in_tensors_
.
at
(
kWeightIndex
)
->
Data
());
int
divCI
=
sizeCI
.
s
[
1
];
int
divCO
=
sizeCO
.
s
[
1
];
int
co
=
sizeCO
.
s
[
0
];
int
index
=
0
;
for
(
int
i
=
0
;
i
<
divCI
;
++
i
)
{
for
(
int
j
=
0
;
j
<
divCO
;
++
j
)
{
for
(
int
k
=
0
;
k
<
4
;
++
k
)
{
for
(
int
l
=
0
;
l
<
4
;
++
l
)
{
int
src_x
=
i
*
4
+
l
;
int
src_y
=
j
*
4
+
k
;
for
(
int
k
=
0
;
k
<
C4NUM
;
++
k
)
{
for
(
int
l
=
0
;
l
<
C4NUM
;
++
l
)
{
int
src_x
=
i
*
C4NUM
+
l
;
int
src_y
=
j
*
C4NUM
+
k
;
if
(
src_x
<
sizeCI
.
s
[
0
]
&&
src_y
<
sizeCO
.
s
[
0
])
{
padWeight_
[
index
++
]
=
origin_weight
[
src_y
*
sizeCI
.
s
[
0
]
+
src_x
];
}
else
{
...
...
@@ -90,60 +98,55 @@ void MatMulOpenCLKernel::PadWeight() {
}
}
}
if
(
hasBias_
)
{
memcpy
(
bias_
,
in_tensors_
[
2
]
->
Data
(),
sizeof
(
FLOAT_T
)
*
sizeCO
.
s
[
0
]);
for
(
int
i
=
sizeCO
.
s
[
0
];
i
<
sizeCO
.
s
[
1
]
*
4
;
i
++
)
{
bias_
[
i
]
=
0
;
}
}
else
{
for
(
int
i
=
0
;
i
<
sizeCO
.
s
[
1
]
*
4
;
i
++
)
{
bias_
[
i
]
=
0
;
}
size_t
im_dst_x
,
im_dst_y
;
im_dst_x
=
divCO
;
im_dst_y
=
1
;
#ifdef ENABLE_FP16
size_t
img_dtype
=
CL_HALF_FLOAT
;
#else
size_t
img_dtype
=
CL_FLOAT
;
#endif
std
::
vector
<
size_t
>
img_size
{
im_dst_x
,
im_dst_y
,
img_dtype
};
bias_
=
reinterpret_cast
<
FLOAT_t
*>
(
allocator
->
Malloc
(
im_dst_x
*
im_dst_y
*
C4NUM
*
sizeof
(
FLOAT_t
),
img_size
));
bias_
=
reinterpret_cast
<
FLOAT_t
*>
(
allocator
->
MapBuffer
(
bias_
,
CL_MAP_WRITE
,
nullptr
,
true
));
memset
(
bias_
,
0x00
,
divCO
*
C4NUM
*
sizeof
(
FLOAT_t
));
if
(
in_tensors_
.
size
()
>=
3
)
{
memcpy
(
bias_
,
in_tensors_
[
2
]
->
Data
(),
co
*
sizeof
(
FLOAT_t
));
}
allocator
->
UnmapBuffer
(
bias_
);
}
int
MatMulOpenCLKernel
::
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
{
size_t
im_dst_x
,
im_dst_y
;
im_dst_x
=
sizeCO
.
s
[
1
];
im_dst_y
=
1
;
#ifdef ENABLE_FP16
size_t
img_dtype
=
CL_HALF_FLOAT
;
#else
size_t
img_dtype
=
CL_FLOAT
;
#endif
img_size
->
clear
();
std
::
vector
<
size_t
>
vec
{
im_dst_x
,
im_dst_y
,
img_dtype
};
*
img_size
=
vec
;
return
RET_OK
;
}
int
MatMulOpenCLKernel
::
Run
()
{
MS_LOG
(
DEBUG
)
<<
this
->
name
()
<<
" Running!"
;
std
::
vector
<
int
>
shapex
=
in_tensors_
[
0
]
->
shape
();
int
n
=
shapex
[
0
];
if
(
n
>
1
)
{
MS_LOG
(
ERROR
)
<<
"MatMul n > 1 not supported!"
;
return
1
;
}
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
// local size should less than MAX_GROUP_SIZE
std
::
vector
<
size_t
>
local
=
{
64
,
4
};
std
::
vector
<
size_t
>
global
=
{
UP_ROUND
(
sizeCO
.
s
[
1
],
local
[
0
]),
4
};
cl
::
ImageFormat
image_format
;
{
image_format
.
image_channel_order
=
CL_RGBA
;
#ifdef ENABLE_FP16
image_format
.
image_channel_data_type
=
CL_HALF_FLOAT
;
#else
image_format
.
image_channel_data_type
=
CL_FLOAT
;
#endif
}
cl_int
in_error_code
,
in_error_code_weight
,
in_error_code_bias
,
out_error_code
;
cl
::
Image2D
img_input
(
*
ocl_runtime
->
Context
(),
CL_MEM_READ_ONLY
|
CL_MEM_COPY_HOST_PTR
,
image_format
,
sizeCI
.
s
[
1
],
1
,
0
,
in_tensors_
[
0
]
->
Data
(),
&
in_error_code
);
cl
::
Image2D
img_bias
(
*
ocl_runtime
->
Context
(),
CL_MEM_READ_ONLY
|
CL_MEM_COPY_HOST_PTR
,
image_format
,
sizeCO
.
s
[
1
],
1
,
0
,
bias_
,
&
in_error_code_bias
);
cl
::
Image2D
img_out
(
*
ocl_runtime
->
Context
(),
CL_MEM_WRITE_ONLY
,
image_format
,
sizeCO
.
s
[
1
],
1
,
0
,
nullptr
,
&
out_error_code
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
0
,
img_input
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
1
,
padWeight_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
2
,
img_bias
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
3
,
img_out
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
4
,
sizeCI
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
5
,
sizeCO
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
6
,
hasBias_
?
1
:
0
);
int
arg_count
=
0
;
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
in_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
padWeight_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
bias_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
out_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
sizeCI
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
sizeCO
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
hasBias_
?
1
:
0
);
ocl_runtime
->
RunKernel
(
kernel_
,
global
,
local
,
nullptr
);
auto
origin
=
cl
::
array
<
cl
::
size_type
,
3U
>
{
0
,
0
,
0
};
auto
region
=
cl
::
array
<
cl
::
size_type
,
3U
>
{(
size_t
)(
sizeCO
.
s
[
1
]),
1
,
1
};
ocl_runtime
->
GetDefaultCommandQueue
()
->
enqueueReadImage
(
img_out
,
CL_TRUE
,
origin
,
region
,
0
,
0
,
out_tensors_
[
0
]
->
Data
());
return
0
;
}
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h
浏览文件 @
89ca31a0
...
...
@@ -23,11 +23,6 @@
#include "src/runtime/kernel/arm/nnacl/conv_parameter.h"
#include "src/runtime/opencl/opencl_runtime.h"
#ifdef ENABLE_FP16
using
FLOAT_T
=
float16_t
;
#else
using
FLOAT_T
=
float
;
#endif
namespace
mindspore
::
kernel
{
...
...
@@ -44,11 +39,12 @@ class MatMulOpenCLKernel : public OpenCLKernel {
int
ReSize
()
override
;
int
Run
()
override
;
void
PadWeight
();
int
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
override
;
private:
cl
::
Kernel
kernel_
;
FLOAT_
T
*
padWeight_
;
FLOAT_
T
*
bias_
;
FLOAT_
t
*
padWeight_
;
FLOAT_
t
*
bias_
;
bool
hasBias_
=
false
;
cl_int2
sizeCI
;
cl_int2
sizeCO
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
浏览文件 @
89ca31a0
...
...
@@ -73,7 +73,9 @@ int PoolingOpenCLKernel::Init() {
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
#endif
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
MS_LOG
(
DEBUG
)
<<
kernel_name
<<
" Init Done!"
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
浏览文件 @
89ca31a0
...
...
@@ -46,7 +46,9 @@ int PReluOpenCLKernel::Init() {
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
MS_LOG
(
DEBUG
)
<<
program_name
<<
" init Done!"
;
return
RET_OK
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
浏览文件 @
89ca31a0
...
...
@@ -43,8 +43,14 @@ int ReshapeOpenCLKernel::Init() {
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
#endif
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC
);
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
if
(
out_tensors_
[
0
]
->
shape
().
size
()
==
2
)
{
out_ori_format_
=
schema
::
Format_NC
;
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NC4
);
}
MS_LOG
(
DEBUG
)
<<
kernel_name
<<
" Init Done!"
;
return
RET_OK
;
}
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
浏览文件 @
89ca31a0
...
...
@@ -123,10 +123,12 @@ int SoftmaxOpenCLKernel::Init() {
runtime_
->
LoadSource
(
program_name
,
source
);
runtime_
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
#endif
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
if
(
!
is_image_out_
)
{
ori_format_
=
schema
::
Format_NC
;
o
ut_o
ri_format_
=
schema
::
Format_NC
;
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NC
);
}
MS_LOG
(
DEBUG
)
<<
kernel_name
<<
" Init Done!"
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
浏览文件 @
89ca31a0
...
...
@@ -49,17 +49,13 @@ int TransposeOpenCLKernel::Init() {
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
#endif
auto
input_format
=
in_tensors_
[
0
]
->
GetFormat
();
if
(
input_format
!=
schema
::
Format_NHWC4
)
{
MS_LOG
(
ERROR
)
<<
"input format("
<<
input_format
<<
") "
<<
"format not support!"
;
return
RET_ERROR
;
}
if
((
in_tensors_
[
0
]
->
Height
()
*
in_tensors_
[
0
]
->
Width
())
%
4
!=
0
)
{
MS_LOG
(
ERROR
)
<<
"input H * W % 4 != 0 not support!"
;
return
RET_ERROR
;
}
ori_format_
=
schema
::
Format_NCHW
;
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_ori_format_
=
schema
::
Format_NCHW
;
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NCHW
);
if
(
!
is_image_out_
)
{
out_mem_type_
=
OpenCLMemType
::
BUF
;
...
...
mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
浏览文件 @
89ca31a0
...
...
@@ -35,7 +35,7 @@ class OpenCLKernel : public LiteKernel {
public:
explicit
OpenCLKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
)
:
LiteKernel
(
parameter
,
inputs
,
outputs
,
nullptr
,
nullptr
)
{}
:
LiteKernel
(
parameter
,
inputs
,
outputs
,
nullptr
,
nullptr
)
{}
virtual
int
Init
()
{
return
-
1
;
}
virtual
int
Prepare
()
{
return
-
1
;
}
...
...
@@ -49,11 +49,13 @@ class OpenCLKernel : public LiteKernel {
}
OpenCLMemType
GetMemType
()
{
return
out_mem_type_
;
}
void
SetMemType
(
OpenCLMemType
mem_type
)
{
out_mem_type_
=
mem_type
;
}
schema
::
Format
GetOriFormat
()
{
return
ori_format_
;}
schema
::
Format
GetInOriFormat
()
{
return
in_ori_format_
;
}
schema
::
Format
GetOutOriFormat
()
{
return
out_ori_format_
;
}
protected:
OpenCLMemType
out_mem_type_
{
OpenCLMemType
::
IMG
};
schema
::
Format
ori_format_
{
schema
::
Format_NHWC4
};
schema
::
Format
in_ori_format_
{
schema
::
Format_NHWC
};
schema
::
Format
out_ori_format_
{
schema
::
Format_NHWC4
};
};
}
// namespace mindspore::kernel
...
...
mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
浏览文件 @
89ca31a0
...
...
@@ -38,10 +38,10 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor *
for
(
auto
&
iv
:
in_kernels
)
{
for
(
auto
&
jv
:
iv
)
{
OpenCLKernel
*
cur_opencl_op
=
reinterpret_cast
<
OpenCLKernel
*>
(
jv
);
schema
::
Format
o
ri_format
=
cur_opencl_op
->
Ge
tOriFormat
();
schema
::
Format
o
ut_ori_format
=
cur_opencl_op
->
GetOu
tOriFormat
();
auto
tens
=
cur_opencl_op
->
out_tensors
();
if
(
mem_type
==
OpenCLMemType
::
BUF
&&
mem_type
==
cur_opencl_op
->
GetMemType
()
&&
tens
[
0
]
->
GetFormat
()
==
ori_format
)
{
tens
[
0
]
->
GetFormat
()
==
o
ut_o
ri_format
)
{
continue
;
}
if
(
mem_type
==
OpenCLMemType
::
IMG
)
{
...
...
@@ -53,14 +53,16 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor *
}
for
(
size_t
i
=
0
;
i
<
in_tensors
.
size
();
++
i
)
{
OpenCLKernel
*
cur_opencl_op
=
reinterpret_cast
<
OpenCLKernel
*>
(
in_kernels
[
i
][
0
]);
schema
::
Format
ori_format
=
cur_opencl_op
->
GetOriFormat
();
schema
::
Format
out_ori_format
=
cur_opencl_op
->
GetOutOriFormat
();
schema
::
Format
in_ori_format
=
cur_opencl_op
->
GetInOriFormat
();
if
(
mem_type
==
OpenCLMemType
::
BUF
&&
mem_type
==
cur_opencl_op
->
GetMemType
()
&&
in_tensors
[
i
]
->
GetFormat
()
==
ori_format
)
{
in_tensors
[
i
]
->
GetFormat
()
==
o
ut_o
ri_format
)
{
continue
;
}
auto
dst_format
=
(
mem_type
==
OpenCLMemType
::
IMG
)
?
in_kernels
[
i
][
0
]
->
out_tensors
()[
0
]
->
GetFormat
()
:
ori_format
;
auto
dst_format
=
(
mem_type
==
OpenCLMemType
::
IMG
)
?
in_kernels
[
i
][
0
]
->
in_tensors
()[
0
]
->
GetFormat
()
:
out_ori_format
;
auto
src_format
=
(
mem_type
==
OpenCLMemType
::
IMG
)
?
in_
tensors
[
i
]
->
GetFormat
()
:
in_kernels
[
i
][
0
]
->
out_tensors
()[
0
]
->
GetFormat
();
(
mem_type
==
OpenCLMemType
::
IMG
)
?
in_
ori_format
:
in_kernels
[
i
][
0
]
->
out_tensors
()[
0
]
->
GetFormat
();
lite
::
tensor
::
Tensor
*
new_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
();
MS_ASSERT
(
new_tensor
);
if
(
new_tensor
==
nullptr
)
{
...
...
@@ -80,7 +82,14 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor *
std
::
vector
<
int
>
dst_shape
{
shape
[
0
],
shape
[
2
],
shape
[
3
],
shape
[
1
]};
new_tensor
->
set_shape
(
shape
);
}
new_tensor
->
SetFormat
(
in_kernels
[
i
][
0
]
->
out_tensors
()[
0
]
->
GetFormat
());
if
(
mem_type
==
OpenCLMemType
::
IMG
)
{
new_tensor
->
SetFormat
(
dst_format
);
in_tensors
[
i
]
->
SetFormat
(
src_format
);
}
else
{
new_tensor
->
SetFormat
(
src_format
);
in_tensors
[
i
]
->
SetFormat
(
dst_format
);
}
out_tensors
->
emplace_back
(
new_tensor
);
#ifdef ENABLE_FP16
KernelKey
desc
{
kGPU
,
kNumberTypeFloat16
,
schema
::
PrimitiveType_ToFormat
};
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc
浏览文件 @
89ca31a0
...
...
@@ -161,7 +161,8 @@ TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
// compare
CompareOutputData
(
output_data
,
correct_data
,
oh
*
ow
*
co
,
0.00001
);
inputs
[
0
]
->
SetData
(
nullptr
);
outputs
[
0
]
->
SetData
(
nullptr
);
MS_LOG
(
INFO
)
<<
"Test Conv2dTransposeFp32 passed"
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
浏览文件 @
89ca31a0
...
...
@@ -31,6 +31,7 @@ class TestMatMulOpenCL : public mindspore::CommonTest {
TEST_F
(
TestMatMulOpenCL
,
MatMulFp32
)
{
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
size_t
input_size
;
int
ci
=
1280
;
int
co
=
1001
;
...
...
@@ -47,16 +48,16 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) {
MS_LOG
(
ERROR
)
<<
"weight_data load error."
;
return
;
}
std
::
vector
<
int
>
input_shape
=
{
1
,
1
,
1
,
ci
};
auto
tensor_x_ptr
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
input_shape
);
std
::
vector
<
int
>
input_shape
=
{
1
,
ci
};
auto
tensor_x_ptr
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
input_shape
,
schema
::
Format_NC
);
auto
tensor_x
=
tensor_x_ptr
.
get
();
if
(
tensor_x
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"tensor_x create error."
;
return
;
}
tensor_x
->
SetData
(
input_data
);
std
::
vector
<
int
>
w_shape
=
{
co
,
1
,
1
,
ci
};
std
::
vector
<
int
>
w_shape
=
{
co
,
ci
};
auto
tensor_w_ptr
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
w_shape
);
auto
tensor_w
=
tensor_w_ptr
.
get
();
if
(
tensor_w
==
nullptr
)
{
...
...
@@ -65,8 +66,9 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) {
}
tensor_w
->
SetData
(
weight_data
);
std
::
vector
<
int
>
out_shape
=
{
1
,
1
,
1
,
co
};
auto
tensor_out_ptr
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
out_shape
);
std
::
vector
<
int
>
out_shape
=
{
1
,
co
};
auto
tensor_out_ptr
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
out_shape
,
schema
::
Format_NC
);
auto
tensor_out
=
tensor_out_ptr
.
get
();
if
(
tensor_out
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"tensor_out create error."
;
...
...
@@ -81,6 +83,7 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) {
return
;
}
arith_kernel
->
Init
();
inputs
[
0
]
->
MallocData
(
allocator
);
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
arith_kernel
};
...
...
@@ -92,6 +95,7 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) {
return
;
}
pGraph
->
Init
();
memcpy
(
inputs
[
0
]
->
Data
(),
input_data
,
input_size
);
pGraph
->
Run
();
size_t
output_size
;
...
...
@@ -108,9 +112,10 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) {
std
::
cout
<<
std
::
endl
;
// compare
CompareOutputData
(
output_data
,
correct_data
,
co
,
0.000
0
1
);
MS_LOG
(
INFO
)
<<
"TestMatMulFp32 passed"
;
CompareOutputData
(
output_data
,
correct_data
,
co
,
0.0001
);
tensor_x
->
SetData
(
nullptr
);
tensor_out
->
SetData
(
nullptr
)
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
MS_LOG
(
INFO
)
<<
"TestMatMulFp32 passed"
;
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc
浏览文件 @
89ca31a0
...
...
@@ -44,14 +44,15 @@ TEST_F(TestTransposeOpenCL, TransposeFp32) {
}
std
::
vector
<
int
>
input_shape
=
{
1
,
h
,
w
,
c
};
auto
tensor_x_ptr
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
input_shape
,
schema
::
Format_NHWC
4
);
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
input_shape
,
schema
::
Format_NHWC
);
auto
tensor_x
=
tensor_x_ptr
.
get
();
if
(
tensor_x
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"tensor_x create error."
;
return
;
}
std
::
vector
<
int
>
out_shape
=
{
1
,
c
,
h
,
w
};
auto
tensor_out_ptr
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
out_shape
);
auto
tensor_out_ptr
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
out_shape
,
schema
::
Format_NCHW
);
auto
tensor_out
=
tensor_out_ptr
.
get
();
if
(
tensor_out
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"tensor_out create error."
;
...
...
@@ -102,7 +103,11 @@ TEST_F(TestTransposeOpenCL, TransposeFp32) {
// compare
CompareOutputData
(
output_data
,
correct_data
,
h
*
w
*
c
,
0.00001
);
MS_LOG
(
INFO
)
<<
"Test TransposeFp32 passed"
;
inputs
[
0
]
->
SetData
(
nullptr
);
outputs
[
0
]
->
SetData
(
nullptr
);
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
MS_LOG
(
INFO
)
<<
"Test TransposeFp32 passed"
;
}
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录