Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
8df0d2fd
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8df0d2fd
编写于
9月 04, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
9月 04, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5759 [MS][LITE][DDevelop] concat ops support nc4hw4 format
Merge pull request !5759 from pengyongrong/op_format_toNC4HW4
上级
82310bb6
20855004
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
51 addition
and
16 deletion
+51
-16
mindspore/lite/src/runtime/kernel/opencl/cl/batchnorm.cl
mindspore/lite/src/runtime/kernel/opencl/cl/batchnorm.cl
+28
-4
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
+19
-8
mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
...lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
+1
-1
mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
...re/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
+3
-3
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/cl/batchnorm.cl
浏览文件 @
8df0d2fd
...
...
@@ -2,10 +2,10 @@
#
define
INT4
int4
#
define
INT2
int2
__constant
sampler_t
smp_none
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_NONE |
CLK_FILTER_NEAREST
;
__kernel
void
batch_normalization
(
__read_only
image2d_t
input,
__read_only
image2d_t
scale,
__read_only
image2d_t
offset,
__read_only
image2d_t
mean,
__read_only
image2d_t
variance,
__write_only
image2d_t
output,
const
INT4
input_shape
,
float
epsilon
)
{
__kernel
void
Batch_normalization_NHWC4
(
__read_only
image2d_t
input,
__read_only
image2d_t
scale,
__read_only
image2d_t
offset,
__read_only
image2d_t
mean,
__read_only
image2d_t
variance,
__write_only
image2d_t
output
,
const
INT4
input_shape,
float
epsilon
)
{
int
X
=
get_global_id
(
0
)
; // H
int
Y
=
get_global_id
(
1
)
; // W
int
Z
=
get_global_id
(
2
)
; // C/4
...
...
@@ -25,3 +25,27 @@ __kernel void batch_normalization(__read_only image2d_t input, __read_only image
result.w = result_scale.w * ((result.w - result_mean.w) / sqrt(result_var.w + epsilon)) + result_offset.w;
WRITE_IMAGE(output, (int2)((Y)*input_shape.w + Z, (X)), result);
}
__kernel void Batch_normalization_NC4HW4(__read_only image2d_t input, __read_only image2d_t scale,
__read_only image2d_t offset, __read_only image2d_t mean,
__read_only image2d_t variance, __write_only image2d_t output,
const INT4 input_shape, float epsilon) {
int X = get_global_id(0); // H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // C/4
if (X >= input_shape.y || Y >= input_shape.z |
|
Z
>=
input_shape.w
)
{
return
;
}
FLT4
result
=
READ_IMAGE
(
input,
smp_none,
(
int2
)((
Y
)
,
(
Z
*
input_shape.y
+
X
)))
;
FLT4
result_mean
=
READ_IMAGE
(
mean,
smp_none,
(
int2
)((
0
)
,
(
Z
)))
;
FLT4
result_var
=
READ_IMAGE
(
variance,
smp_none,
(
int2
)((
0
)
,
(
Z
)))
;
FLT4
result_scale
=
READ_IMAGE
(
scale,
smp_none,
(
int2
)((
0
)
,
(
Z
)))
;
FLT4
result_offset
=
READ_IMAGE
(
offset,
smp_none,
(
int2
)((
0
)
,
(
Z
)))
;
result.x
=
result_scale.x
*
((
result.x
-
result_mean.x
)
/
sqrt
(
result_var.x
+
epsilon
))
+
result_offset.x
;
result.y
=
result_scale.y
*
((
result.y
-
result_mean.y
)
/
sqrt
(
result_var.y
+
epsilon
))
+
result_offset.y
;
result.z
=
result_scale.z
*
((
result.z
-
result_mean.z
)
/
sqrt
(
result_var.z
+
epsilon
))
+
result_offset.z
;
result.w
=
result_scale.w
*
((
result.w
-
result_mean.w
)
/
sqrt
(
result_var.w
+
epsilon
))
+
result_offset.w
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
,
(
Z
*
input_shape.y
+
X
))
,
result
)
;
}
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
浏览文件 @
8df0d2fd
...
...
@@ -14,7 +14,6 @@
* limitations under the License.
*/
#include <cstring>
#include <string>
#include <algorithm>
#include <set>
#include "src/kernel_registry.h"
...
...
@@ -35,7 +34,7 @@ int BatchNormOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_siz
im_dst_x
=
out_tensors_
[
0
]
->
Width
()
*
CO4
;
im_dst_y
=
out_tensors_
[
0
]
->
Height
();
}
else
{
im_dst_y
=
out_tensors_
[
0
]
->
Height
()
*
CO4
;
im_dst_y
=
out_tensors_
[
0
]
->
Batch
()
*
out_tensors_
[
0
]
->
Height
()
*
CO4
;
im_dst_x
=
out_tensors_
[
0
]
->
Width
();
}
size_t
img_dtype
=
CL_FLOAT
;
...
...
@@ -50,17 +49,29 @@ int BatchNormOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_siz
return
RET_OK
;
}
int
BatchNormOpenCLKernel
::
Init
()
{
auto
in_format
=
op_format_
;
if
(
in_format
!=
schema
::
Format_NHWC4
&&
in_format
!=
schema
::
Format_NC4HW4
)
{
MS_LOG
(
ERROR
)
<<
"input format("
<<
in_format
<<
") "
<<
"format not support!"
;
return
RET_ERROR
;
}
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
op_format_
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
op_format_
);
std
::
string
kernel_name
=
"Batch_normalization"
;
if
(
in_format
==
schema
::
Format_NC4HW4
)
{
kernel_name
+=
"_NC4HW4"
;
}
else
if
(
in_format
==
schema
::
Format_NHWC4
)
{
kernel_name
+=
"_NHWC4"
;
}
std
::
set
<
std
::
string
>
build_options
;
std
::
string
source
=
batchnorm_source
;
std
::
string
program_name
=
"batch_normalization"
;
std
::
string
kernel_name
=
"batch_normalization"
;
std
::
string
program_name
=
"Batch_normalization"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
return
RET_OK
;
}
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
浏览文件 @
8df0d2fd
...
...
@@ -197,7 +197,7 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) {
return
;
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC
4
,
tensor_type
);
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
" init tensor failed "
;
delete
tensor_data
;
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
浏览文件 @
8df0d2fd
...
...
@@ -180,8 +180,8 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) {
MS_LOG
(
INFO
)
<<
" init tensors "
;
constexpr
int
INPUT_NUM
=
3
;
std
::
array
<
std
::
vector
<
int
>
,
INPUT_NUM
>
input_shapes
=
{
std
::
vector
<
int
>
{
1
,
2
,
4
,
8
},
std
::
vector
<
int
>
{
1
,
2
,
4
,
8
},
std
::
vector
<
int
>
{
1
,
2
,
4
,
8
}};
std
::
vector
<
int
>
output_shape
=
{
3
,
2
,
4
,
8
};
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
}};
std
::
vector
<
int
>
output_shape
=
{
1
,
48
,
256
,
80
};
auto
data_type
=
kNumberTypeFloat32
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
;
...
...
@@ -217,7 +217,7 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) {
}
return
;
}
param
->
axis_
=
0
;
param
->
axis_
=
1
;
auto
*
concat_kernel
=
new
(
std
::
nothrow
)
kernel
::
ConcatOpenCLKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
if
(
concat_kernel
==
nullptr
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录