Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
mindspore
提交
8e442ce7
M
mindspore
项目概览
MindSpore
/
mindspore
通知
35
Star
15
Fork
15
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
8e442ce7
编写于
9月 02, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
9月 02, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5635 [MS][LITE][GPU]fix bug in matmul and pooling
Merge pull request !5635 from chenzupeng/master-lite
上级
03093778
96744911
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
41 addition
and
33 deletion
+41
-33
mindspore/lite/src/runtime/kernel/opencl/cl/avg_pool2d.cl
mindspore/lite/src/runtime/kernel/opencl/cl/avg_pool2d.cl
+17
-14
mindspore/lite/src/runtime/kernel/opencl/cl/max_pool2d.cl
mindspore/lite/src/runtime/kernel/opencl/cl/max_pool2d.cl
+13
-10
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
+3
-4
mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
+5
-5
mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h
mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h
+1
-0
mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
+2
-0
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/cl/avg_pool2d.cl
浏览文件 @
8e442ce7
__kernel
void
AvgPooling2d_BUF
(
__global
float4
*input,
__global
float4
*output,
const
int4
input_shape,
#
ifdef
cl_khr_fp16
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
endif
__kernel
void
AvgPooling2d_BUF
(
__global
FLT4
*input,
__global
FLT4
*output,
const
int4
input_shape,
const
int4
output_shape,
const
int2
stride,
const
int2
kernel_size,
const
int2
padding
)
{
//
axis
to
dst
tensor
coordinate
int
X
=
get_global_id
(
0
)
;
...
...
@@ -10,10 +13,10 @@ __kernel void AvgPooling2d_BUF(__global float4 *input, __global float4 *output,
return;
}
float4 r = (float
4)(0.0f);
float
window_size = 0.0f;
int xs = X * stride.x
+
padding.x;
int ys = Y * stride.y
+
padding.y;
FLT4 r = (FLT
4)(0.0f);
FLT
window_size = 0.0f;
int xs = X * stride.x
-
padding.x;
int ys = Y * stride.y
-
padding.y;
for (int kx = 0; kx < kernel_size.x; ++kx) {
int x_c = xs + kx;
...
...
@@ -21,11 +24,11 @@ __kernel void AvgPooling2d_BUF(__global float4 *input, __global float4 *output,
for (int ky = 0; ky < kernel_size.y; ++ky) {
int y_c = ys + ky;
bool outside = outside_x || y_c < 0 || y_c >= input_shape.y;
r += !outside ? input[(input_shape.y * x_c + y_c) * output_shape.w + Z] : (
float
4)(0.0f);
r += !outside ? input[(input_shape.y * x_c + y_c) * output_shape.w + Z] : (
FLT
4)(0.0f);
window_size += !outside ? 1.0f : 0.0f;
}
}
float4 result = convert_float
4(r / window_size);
FLT4 result = TO_FLT
4(r / window_size);
output[(output_shape.y * X + Y) * output_shape.w + Z] = result;
}
...
...
@@ -43,10 +46,10 @@ __kernel void AvgPooling2d_IMG(__read_only image2d_t input, __write_only image2d
return;
}
float4 r = (float
4)(0.0f);
float
window_size = 0.0f;
int xs = X * stride.x
+
padding.x;
int ys = Y * stride.y
+
padding.y;
FLT4 r = (FLT
4)(0.0f);
FLT
window_size = 0.0f;
int xs = X * stride.x
-
padding.x;
int ys = Y * stride.y
-
padding.y;
for (int ky = 0; ky < kernel_size.y; ++ky) {
int y_c = ys + ky;
...
...
@@ -54,10 +57,10 @@ __kernel void AvgPooling2d_IMG(__read_only image2d_t input, __write_only image2d
for (int kx = 0; kx < kernel_size.x; ++kx) {
int x_c = xs + kx;
bool outside = outside_y || x_c < 0 |
|
x_c
>=
input_shape.x
;
r
+=
read_imagef
(
input,
smp_zero,
(
int2
)(
y_c
*
input_shape.w
+
Z,
x_c
)
)
;
r
+=
!outside
?
READ_IMAGE
(
input,
smp_zero,
(
int2
)(
y_c
*
input_shape.w
+
Z,
x_c
))
:
(
float4
)(
0.0f
)
;
window_size
+=
!outside
?
1.0f
:
0.0f
;
}
}
float4
result
=
convert_float
4
(
r
/
window_size
)
;
write_imagef
(
output,
(
int2
)(
Y
*
output_shape.w
+
Z,
X
)
,
result
)
;
FLT4
result
=
TO_FLT
4
(
r
/
window_size
)
;
WRITE_IMAGE
(
output,
(
int2
)(
Y
*
output_shape.w
+
Z,
X
)
,
result
)
;
}
mindspore/lite/src/runtime/kernel/opencl/cl/max_pool2d.cl
浏览文件 @
8e442ce7
__kernel
void
MaxPooling2d_BUF
(
__global
float4
*input,
__global
float4
*output,
const
int4
input_shape,
#
ifdef
cl_khr_fp16
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
endif
__kernel
void
MaxPooling2d_BUF
(
__global
FLT4
*input,
__global
FLT4
*output,
const
int4
input_shape,
const
int4
output_shape,
const
int2
stride,
const
int2
kernel_size,
const
int2
padding
)
{
//
axis
to
dst
tensor
coordinate
int
X
=
get_global_id
(
0
)
;
...
...
@@ -10,9 +13,9 @@ __kernel void MaxPooling2d_BUF(__global float4 *input, __global float4 *output,
return;
}
float4 maximum = (float
4)(-10000.0f);
int xs = X * stride.x
+
padding.x;
int ys = Y * stride.y
+
padding.y;
FLT4 maximum = (FLT
4)(-10000.0f);
int xs = X * stride.x
-
padding.x;
int ys = Y * stride.y
-
padding.y;
for (int kx = 0; kx < kernel_size.x; ++kx) {
int x_c = xs + kx;
...
...
@@ -24,7 +27,7 @@ __kernel void MaxPooling2d_BUF(__global float4 *input, __global float4 *output,
if (y_c < 0 || y_c >= input_shape.y) {
continue;
}
float
4 src = input[(input_shape.y * x_c + y_c) * input_shape.w + Z];
FLT
4 src = input[(input_shape.y * x_c + y_c) * input_shape.w + Z];
maximum = max(src, maximum);
}
}
...
...
@@ -45,18 +48,18 @@ __kernel void MaxPooling2d_IMG(__read_only image2d_t input, __write_only image2d
return;
}
float4 maximum = (float
4)(-10000.0f);
int xs = X * stride.x
+
padding.x;
int ys = Y * stride.y
+
padding.y;
FLT4 maximum = (FLT
4)(-10000.0f);
int xs = X * stride.x
-
padding.x;
int ys = Y * stride.y
-
padding.y;
for (int ky = 0; ky < kernel_size.y; ++ky) {
int y_c = ys + ky;
if (y_c < 0 || y_c >= input_shape.y) continue;
for (int kx = 0; kx < kernel_size.x; ++kx) {
int x_c = xs + kx;
if (x_c < 0 |
|
x_c
>=
input_shape.x
)
continue
;
float4
src
=
read_imagef
(
input,
smp_none,
(
int2
)(
y_c
*
input_shape.w
+
Z,
x_c
))
;
FLT4
src
=
READ_IMAGE
(
input,
smp_none,
(
int2
)(
y_c
*
input_shape.w
+
Z,
x_c
))
;
maximum
=
max
(
src,
maximum
)
;
}
}
write_imagef
(
output,
(
int2
)(
Y
*
output_shape.w
+
Z,
X
)
,
maximum
)
;
WRITE_IMAGE
(
output,
(
int2
)(
Y
*
output_shape.w
+
Z,
X
)
,
maximum
)
;
}
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
浏览文件 @
8e442ce7
...
...
@@ -58,14 +58,13 @@ int MatMulOpenCLKernel::Init() {
sizeCO
=
{
co
,
UP_DIV
(
co
,
C4NUM
)};
PadWeight
();
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
if
(
out_tensors_
[
0
]
->
shape
().
size
()
==
2
)
{
out_ori_format_
=
schema
::
Format_NC
;
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NC4
);
in_ori_format_
=
schema
::
Format_NC
;
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NC4
);
}
else
{
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
}
MS_LOG
(
DEBUG
)
<<
kernel_name
<<
" Init Done!"
;
return
RET_OK
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
浏览文件 @
8e442ce7
...
...
@@ -60,7 +60,7 @@ int PoolingOpenCLKernel::Init() {
return
RET_INVALID_OP_NAME
;
}
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
enable_fp16_
=
ocl_runtime
->
GetFp16Enable
();
#ifdef PROGRAM_WITH_IL
kernel_
=
ocl_runtime
->
GetKernelFromBinary
(
kernel_name
);
#else
...
...
@@ -96,11 +96,10 @@ int PoolingOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
size_t
im_dst_x
,
im_dst_y
;
im_dst_x
=
out_tensors_
[
0
]
->
Width
()
*
CO4
;
im_dst_y
=
out_tensors_
[
0
]
->
Height
();
#ifdef ENABLE_FP16
size_t
img_dtype
=
CL_HALF_FLOAT
;
#else
size_t
img_dtype
=
CL_FLOAT
;
#endif
if
(
enable_fp16_
)
{
img_dtype
=
CL_HALF_FLOAT
;
}
img_size
->
clear
();
std
::
vector
<
size_t
>
vec
{
im_dst_x
,
im_dst_y
,
img_dtype
};
*
img_size
=
vec
;
...
...
@@ -161,5 +160,6 @@ kernel::LiteKernel *OpenCLPooling2dKernelCreator(const std::vector<lite::tensor:
}
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Pooling
,
OpenCLPooling2dKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat16
,
PrimitiveType_Pooling
,
OpenCLPooling2dKernelCreator
)
}
// namespace kernel
}
// namespace mindspore
mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h
浏览文件 @
8e442ce7
...
...
@@ -44,6 +44,7 @@ class PoolingOpenCLKernel : public OpenCLKernel {
std
::
vector
<
size_t
>
InitGlobalSize
()
const
;
PoolingParameter
*
parameter_
;
cl
::
Kernel
kernel_
;
bool
enable_fp16_
{
false
};
};
}
// namespace mindspore::kernel
...
...
mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
浏览文件 @
8e442ce7
...
...
@@ -50,6 +50,7 @@ class OpenCLKernel : public LiteKernel {
}
OpenCLMemType
GetMemType
()
{
return
out_mem_type_
;
}
void
SetMemType
(
OpenCLMemType
mem_type
)
{
out_mem_type_
=
mem_type
;
}
void
SetFormatType
(
schema
::
Format
format_type
)
{
op_format_
=
format_type
;
}
schema
::
Format
GetInOriFormat
()
{
return
in_ori_format_
;
}
schema
::
Format
GetOutOriFormat
()
{
return
out_ori_format_
;
}
...
...
@@ -57,6 +58,7 @@ class OpenCLKernel : public LiteKernel {
OpenCLMemType
out_mem_type_
{
OpenCLMemType
::
IMG
};
schema
::
Format
in_ori_format_
{
schema
::
Format_NHWC
};
schema
::
Format
out_ori_format_
{
schema
::
Format_NHWC4
};
schema
::
Format
op_format_
{
schema
::
Format_NC4HW4
};
};
}
// namespace mindspore::kernel
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录