Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
1c1ee660
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1c1ee660
编写于
8月 17, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 17, 2020
浏览文件
操作
浏览文件
下载
差异文件
!4603 [MS][LITE][Develop]add new ops named batchnorm for opencl(GPU)
Merge pull request !4603 from pengyongrong/batchnorm
上级
4975b4d5
a9e0cf68
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
409 addition
and
20 deletion
+409
-20
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/batchnorm.cl
...spore/lite/src/runtime/kernel/opencl/cl/fp32/batchnorm.cl
+27
-0
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/to_format.cl
...spore/lite/src/runtime/kernel/opencl/cl/fp32/to_format.cl
+2
-2
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
+148
-0
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h
+49
-0
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
+6
-4
mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
+9
-9
mindspore/lite/src/scheduler.cc
mindspore/lite/src/scheduler.cc
+31
-5
mindspore/lite/src/scheduler.h
mindspore/lite/src/scheduler.h
+1
-0
mindspore/lite/test/CMakeLists.txt
mindspore/lite/test/CMakeLists.txt
+2
-0
mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
...lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
+134
-0
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/batchnorm.cl
0 → 100644
浏览文件 @
1c1ee660
#
define
FLT4
float4
#
define
INT4
int4
#
define
INT2
int2
__constant
sampler_t
smp_none
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_NONE |
CLK_FILTER_NEAREST
;
__kernel
void
batch_normalization
(
__read_only
image2d_t
input,
__read_only
image2d_t
scale,
__read_only
image2d_t
offset,
__read_only
image2d_t
mean,
__read_only
image2d_t
variance,
__write_only
image2d_t
output,
const
INT4
input_shape,
float
epsilon
)
{
int
X
=
get_global_id
(
0
)
; // H
int
Y
=
get_global_id
(
1
)
; // W
int
Z
=
get_global_id
(
2
)
; // C/4
if
(
X
>=
input_shape.y
|
| Y >= input_shape.z |
|
Z
>=
input_shape.w
)
{
return
;
}
FLT4
result
=
read_imagef
(
input,
smp_none,
(
int2
)((
Y
)
*input_shape.w
+
Z,
(
X
)))
;
FLT4
result_mean
=
read_imagef
(
mean,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_var
=
read_imagef
(
variance,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_scale
=
read_imagef
(
scale,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_offset
=
read_imagef
(
offset,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
result.x
=
result_scale.x
*
((
result.x
-
result_mean.x
)
/
sqrt
(
result_var.x
+
epsilon
))
+
result_offset.x
;
result.y
=
result_scale.y
*
((
result.y
-
result_mean.y
)
/
sqrt
(
result_var.y
+
epsilon
))
+
result_offset.y
;
result.z
=
result_scale.z
*
((
result.z
-
result_mean.z
)
/
sqrt
(
result_var.z
+
epsilon
))
+
result_offset.z
;
result.w
=
result_scale.w
*
((
result.w
-
result_mean.w
)
/
sqrt
(
result_var.w
+
epsilon
))
+
result_offset.w
;
write_imagef
(
output,
(
int2
)((
Y
)
*input_shape.w
+
Z,
(
X
))
,
result
)
;
}
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/to_format.cl
浏览文件 @
1c1ee660
...
@@ -63,7 +63,7 @@ __kernel void to_format_NHWC4_to_NHWC4_IMG(__global FLT4 *src_data, __write_only
...
@@ -63,7 +63,7 @@ __kernel void to_format_NHWC4_to_NHWC4_IMG(__global FLT4 *src_data, __write_only
if (X >= size.x || Y >= size.y || Z >= size.z) {
if (X >= size.x || Y >= size.y || Z >= size.z) {
return;
return;
}
}
// WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), READ_IMAGE(src_data, smp_zero, (int2)(Y * size.z + Z, X))
);
WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), src_data[(X * size.y + Y) * size.z + Z]
);
}
}
__kernel void to_format_NC4HW4_to_NHWC4_IMG(__global FLT4 *src_data, __write_only image2d_t dst_data, int4 size,
__kernel void to_format_NC4HW4_to_NHWC4_IMG(__global FLT4 *src_data, __write_only image2d_t dst_data, int4 size,
int4 shape) {
int4 shape) {
...
@@ -231,5 +231,5 @@ __kernel void to_format_NHWC4_to_NHWC4_BUF(__read_only image2d_t src_data, __glo
...
@@ -231,5 +231,5 @@ __kernel void to_format_NHWC4_to_NHWC4_BUF(__read_only image2d_t src_data, __glo
if (X >= size.x || Y >= size.y |
|
Z
>=
size.z
)
{
if (X >= size.x || Y >= size.y |
|
Z
>=
size.z
)
{
return
;
return
;
}
}
dst_data[
(
Y
*
size.z
+
Z
)
*
size.x
+
X
]
=
READ_IMAGE
(
src_data,
smp_zero,
(
int2
)(
Y
*
size.z
+
Z,
X
))
;
dst_data[
(
X
*
size.y
+
Y
)
*
size.z
+
Z
]
=
READ_IMAGE
(
src_data,
smp_zero,
(
int2
)(
Y
*
size.z
+
Z,
X
))
;
}
}
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
0 → 100644
浏览文件 @
1c1ee660
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstring>
#include <string>
#include <algorithm>
#include <set>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/batchnorm.h"
#include "src/runtime/kernel/opencl/cl/fp32/batchnorm.cl.inc"
using
mindspore
::
kernel
::
KERNEL_ARCH
::
kGPU
;
using
mindspore
::
lite
::
KernelRegistrar
;
using
mindspore
::
schema
::
PrimitiveType_BatchNorm
;
namespace
mindspore
::
kernel
{
int
BatchNormOpenCLKernel
::
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
{
size_t
CO4
=
UP_DIV
(
out_tensors_
[
0
]
->
Channel
(),
C4NUM
);
size_t
im_dst_x
,
im_dst_y
;
if
(
in_tensors_
[
0
]
->
GetFormat
()
==
schema
::
Format_NHWC4
)
{
im_dst_x
=
out_tensors_
[
0
]
->
Width
()
*
CO4
;
im_dst_y
=
out_tensors_
[
0
]
->
Height
();
}
else
{
im_dst_y
=
out_tensors_
[
0
]
->
Height
()
*
CO4
;
im_dst_x
=
out_tensors_
[
0
]
->
Width
();
}
#ifdef ENABLE_FP16
size_t
img_dtype
=
CL_HALF_FLOAT
;
#else
size_t
img_dtype
=
CL_FLOAT
;
#endif
img_size
->
clear
();
std
::
vector
<
size_t
>
vec
{
im_dst_x
,
im_dst_y
,
img_dtype
};
*
img_size
=
vec
;
return
RET_OK
;
}
int
BatchNormOpenCLKernel
::
Init
()
{
std
::
set
<
std
::
string
>
build_options
;
std
::
string
source
=
batchnorm_source_fp32
;
std
::
string
program_name
=
"batch_normalization"
;
std
::
string
kernel_name
=
"batch_normalization"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
return
RET_OK
;
}
int
BatchNormOpenCLKernel
::
ReSize
()
{
return
RET_OK
;
}
int
BatchnormGetBiggestDividerWithPriority
(
int
number
,
int
max_divider
)
{
if
(
number
%
8
==
0
&&
8
<=
max_divider
)
{
return
number
/
8
;
}
if
(
number
%
4
==
0
&&
4
<=
max_divider
)
{
return
number
/
4
;
}
if
(
number
%
2
==
0
&&
2
<=
max_divider
)
{
return
number
/
2
;
}
for
(
int
i
=
max_divider
;
i
!=
0
;
i
--
)
{
if
(
number
%
i
==
0
)
{
return
i
;
}
}
return
RET_OK
;
}
void
BatchNormGetWorkGroup
(
const
std
::
vector
<
size_t
>
&
global
,
std
::
vector
<
size_t
>
*
local
,
int
max_size
)
{
const
int
max_divider
=
8
;
const
int
max_x
=
4
,
max_y
=
8
;
int
x
=
std
::
min
(
BatchnormGetBiggestDividerWithPriority
(
global
[
0
],
max_divider
),
max_x
);
int
yz
=
max_size
/
x
;
int
y
=
std
::
min
(
std
::
min
(
BatchnormGetBiggestDividerWithPriority
(
global
[
1
],
max_divider
),
yz
),
max_y
);
int
z
=
std
::
min
(
yz
/
y
,
static_cast
<
int
>
(
UP_DIV
(
global
[
2
],
2
)));
local
->
clear
();
local
->
push_back
(
x
);
local
->
push_back
(
y
);
local
->
push_back
(
z
);
}
int
BatchNormOpenCLKernel
::
Run
()
{
MS_LOG
(
DEBUG
)
<<
this
->
name
()
<<
" Running!"
;
auto
param
=
reinterpret_cast
<
BatchNormParameter
*>
(
this
->
op_parameter_
);
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
input0_shape
=
in_tensors_
[
0
]
->
shape
();
auto
output_shape
=
out_tensors_
[
0
]
->
shape
();
cl_int4
input_shape_
=
{
input0_shape
[
0
],
input0_shape
[
1
],
input0_shape
[
2
],
UP_DIV
(
input0_shape
[
3
],
C4NUM
)};
uint32_t
OH
=
output_shape
[
1
];
uint32_t
OW
=
output_shape
[
2
];
uint32_t
OC
=
UP_DIV
(
output_shape
[
3
],
C4NUM
);
const
std
::
vector
<
size_t
>
&
max_global
=
ocl_runtime
->
GetWorkItemSize
();
std
::
vector
<
size_t
>
local
=
{
1
,
1
,
1
};
// init local
std
::
vector
<
size_t
>
global
=
{
OH
,
OW
,
OC
};
BatchNormGetWorkGroup
(
global
,
&
local
,
max_global
[
0
]);
int
arg_cn
=
0
;
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
in_tensors_
[
0
]
->
Data
());
// input tensor
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
in_tensors_
[
1
]
->
Data
());
// scale
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
in_tensors_
[
2
]
->
Data
());
// offest
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
in_tensors_
[
3
]
->
Data
());
// mean
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
in_tensors_
[
4
]
->
Data
());
// variance
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
out_tensors_
[
0
]
->
Data
());
// out tensor
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
input_shape_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
param
->
epsilon_
);
ocl_runtime
->
RunKernel
(
kernel_
,
global
,
local
,
nullptr
);
return
RET_OK
;
}
// namespace mindspore::kernel
kernel
::
LiteKernel
*
OpenCLBatchnormKernelCreator
(
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
OpParameter
*
opParameter
,
const
lite
::
Context
*
ctx
,
const
kernel
::
KernelKey
&
desc
,
const
lite
::
Primitive
*
primitive
)
{
auto
*
kernel
=
new
(
std
::
nothrow
)
BatchNormOpenCLKernel
(
opParameter
,
inputs
,
outputs
);
if
(
kernel
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new BatchnormOpenCLKernel failed"
;
return
nullptr
;
}
auto
ret
=
kernel
->
Init
();
if
(
0
!=
ret
)
{
MS_LOG
(
ERROR
)
<<
"Init kernel failed, name: Convolution"
;
delete
kernel
;
return
nullptr
;
}
return
kernel
;
}
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_BatchNorm
,
OpenCLBatchnormKernelCreator
);
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h
0 → 100644
浏览文件 @
1c1ee660
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_BACKEND_OPENCL_BATCHNORM_H_
#define MINDSPORE_LITE_SRC_BACKEND_OPENCL_BATCHNORM_H_
#include <vector>
#include "ir/anf.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/arm/nnacl/fp32/batchnorm.h"
namespace
mindspore
::
kernel
{
class
BatchNormOpenCLKernel
:
public
OpenCLKernel
{
public:
explicit
BatchNormOpenCLKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
)
:
OpenCLKernel
(
parameter
,
inputs
,
outputs
)
{}
~
BatchNormOpenCLKernel
()
override
{};
int
Init
()
override
;
int
ReSize
()
override
;
int
Run
()
override
;
int
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
override
;
private:
cl
::
Kernel
kernel_
;
};
}
// namespace mindspore::kernel
#endif
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
浏览文件 @
1c1ee660
...
@@ -83,6 +83,8 @@ int ConcatOpenCLKernel::Init() {
...
@@ -83,6 +83,8 @@ int ConcatOpenCLKernel::Init() {
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
}
}
ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
return
RET_OK
;
return
RET_OK
;
}
}
...
@@ -116,8 +118,8 @@ int ConcatOpenCLKernel::Run_axis0() {
...
@@ -116,8 +118,8 @@ int ConcatOpenCLKernel::Run_axis0() {
return
RET_OK
;
return
RET_OK
;
}
}
int
GetBiggestDividerWithPriority
(
int
number
,
int
max_divider
)
{
int
Concat
GetBiggestDividerWithPriority
(
int
number
,
int
max_divider
)
{
if
(
number
%
8
==
0
&&
8
<=
max_divider
)
{
if
(
number
%
8
==
0
&&
max_divider
>=
8
)
{
return
number
/
8
;
return
number
/
8
;
}
}
if
(
number
%
4
==
0
&&
4
<=
max_divider
)
{
if
(
number
%
4
==
0
&&
4
<=
max_divider
)
{
...
@@ -138,9 +140,9 @@ int GetBiggestDividerWithPriority(int number, int max_divider) {
...
@@ -138,9 +140,9 @@ int GetBiggestDividerWithPriority(int number, int max_divider) {
void
ConcatGetWorkGroup
(
const
std
::
vector
<
size_t
>
&
global
,
std
::
vector
<
size_t
>
*
local
,
int
max_size
)
{
void
ConcatGetWorkGroup
(
const
std
::
vector
<
size_t
>
&
global
,
std
::
vector
<
size_t
>
*
local
,
int
max_size
)
{
const
int
max_divider
=
8
;
const
int
max_divider
=
8
;
const
int
max_x
=
4
,
max_y
=
8
;
const
int
max_x
=
4
,
max_y
=
8
;
int
x
=
std
::
min
(
GetBiggestDividerWithPriority
(
global
[
0
],
max_divider
),
max_x
);
int
x
=
std
::
min
(
Concat
GetBiggestDividerWithPriority
(
global
[
0
],
max_divider
),
max_x
);
int
yz
=
max_size
/
x
;
int
yz
=
max_size
/
x
;
int
y
=
std
::
min
(
std
::
min
(
GetBiggestDividerWithPriority
(
global
[
1
],
max_divider
),
yz
),
max_y
);
int
y
=
std
::
min
(
std
::
min
(
Concat
GetBiggestDividerWithPriority
(
global
[
1
],
max_divider
),
yz
),
max_y
);
int
z
=
std
::
min
(
yz
/
y
,
static_cast
<
int
>
(
UP_DIV
(
global
[
2
],
2
)));
int
z
=
std
::
min
(
yz
/
y
,
static_cast
<
int
>
(
UP_DIV
(
global
[
2
],
2
)));
local
->
clear
();
local
->
clear
();
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
浏览文件 @
1c1ee660
...
@@ -103,16 +103,16 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size
...
@@ -103,16 +103,16 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size
size_t
im_dst_x
,
im_dst_y
;
size_t
im_dst_x
,
im_dst_y
;
std
::
vector
<
int
>
shapex
=
out_tensors_
[
0
]
->
shape
();
std
::
vector
<
int
>
shapex
=
out_tensors_
[
0
]
->
shape
();
if
(
out_tensors_
[
0
]
->
GetFormat
()
==
schema
::
Format_NC4HW4
)
{
if
(
out_tensors_
[
0
]
->
GetFormat
()
==
schema
::
Format_NC4HW4
)
{
int
c
=
shapex
[
1
];
int
h
=
shapex
[
2
];
int
w
=
shapex
[
3
];
im_dst_y
=
h
*
UP_DIV
(
c
,
C4NUM
);
im_dst_x
=
w
;
}
else
if
(
out_tensors_
[
0
]
->
GetFormat
()
==
schema
::
Format_NHWC4
)
{
int
h
=
shapex
[
1
];
int
h
=
shapex
[
1
];
int
w
=
shapex
[
2
];
int
w
=
shapex
[
2
];
int
c
=
shapex
[
3
];
int
c
=
shapex
[
3
];
im_dst_y
=
UP_DIV
(
h
*
c
,
C4NUM
);
im_dst_x
=
w
*
UP_DIV
(
c
,
C4NUM
);
im_dst_x
=
w
;
}
else
if
(
out_tensors_
[
0
]
->
GetFormat
()
==
schema
::
Format_NHWC4
)
{
int
h
=
shapex
[
2
];
int
w
=
shapex
[
3
];
int
c
=
shapex
[
1
];
im_dst_x
=
UP_DIV
(
w
*
c
,
C4NUM
);
im_dst_y
=
h
;
im_dst_y
=
h
;
}
else
{
}
else
{
MS_LOG
(
ERROR
)
<<
"Unsupported format. "
<<
out_tensors_
[
0
]
->
GetFormat
();
MS_LOG
(
ERROR
)
<<
"Unsupported format. "
<<
out_tensors_
[
0
]
->
GetFormat
();
...
@@ -127,9 +127,9 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size
...
@@ -127,9 +127,9 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size
*
img_size
=
vec
;
*
img_size
=
vec
;
return
RET_OK
;
return
RET_OK
;
}
}
int
ToFormatOpenCLKernel
::
Run
()
{
int
ToFormatOpenCLKernel
::
Run
()
{
MS_LOG
(
DEBUG
)
<<
"ToFormat"
<<
" Running!"
;
MS_LOG
(
DEBUG
)
<<
"ToFormat"
<<
" Running!"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
std
::
vector
<
size_t
>
local
=
{};
std
::
vector
<
size_t
>
local
=
{};
std
::
vector
<
size_t
>
global
;
std
::
vector
<
size_t
>
global
;
...
...
mindspore/lite/src/scheduler.cc
浏览文件 @
1c1ee660
...
@@ -31,7 +31,12 @@ int Scheduler::Schedule(const lite::Model *model, std::vector<tensor::Tensor *>
...
@@ -31,7 +31,12 @@ int Scheduler::Schedule(const lite::Model *model, std::vector<tensor::Tensor *>
// 1. op ---> kernel
// 1. op ---> kernel
// 2. sub graph
// 2. sub graph
// 3. kernels (kernels --> subGraph)
// 3. kernels (kernels --> subGraph)
int
ret
=
InitOp2Kernel
(
model
,
tensors
,
kernels
);
int
ret
=
InferShape
(
model
,
tensors
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"op infer shape failed."
;
return
RET_ERROR
;
}
ret
=
InitOp2Kernel
(
model
,
tensors
,
kernels
);
if
(
ret
!=
RET_OK
)
{
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"init op to kernel failed."
;
MS_LOG
(
ERROR
)
<<
"init op to kernel failed."
;
return
RET_ERROR
;
return
RET_ERROR
;
...
@@ -72,15 +77,12 @@ int Scheduler::ReSizeKernels(const std::vector<kernel::LiteKernel *> &kernels) {
...
@@ -72,15 +77,12 @@ int Scheduler::ReSizeKernels(const std::vector<kernel::LiteKernel *> &kernels) {
return
RET_OK
;
return
RET_OK
;
}
}
int
Scheduler
::
InitOp2Kernel
(
const
lite
::
Model
*
model
,
std
::
vector
<
tensor
::
Tensor
*>
*
tensors
,
int
Scheduler
::
InferShape
(
const
lite
::
Model
*
model
,
std
::
vector
<
tensor
::
Tensor
*>
*
tensors
)
{
std
::
vector
<
kernel
::
LiteKernel
*>
*
kernels
)
{
MS_EXCEPTION_IF_NULL
(
model
);
MS_EXCEPTION_IF_NULL
(
model
);
MS_EXCEPTION_IF_NULL
(
tensors
);
MS_EXCEPTION_IF_NULL
(
tensors
);
MS_EXCEPTION_IF_NULL
(
kernels
);
auto
meta_graph
=
model
->
GetMetaGraph
();
auto
meta_graph
=
model
->
GetMetaGraph
();
MS_EXCEPTION_IF_NULL
(
meta_graph
);
MS_EXCEPTION_IF_NULL
(
meta_graph
);
uint32_t
kernelCount
=
meta_graph
->
nodes
()
->
size
();
uint32_t
kernelCount
=
meta_graph
->
nodes
()
->
size
();
auto
graph_output_node_indexes
=
GetGraphOutputNodes
(
meta_graph
);
for
(
uint32_t
i
=
0
;
i
<
kernelCount
;
i
++
)
{
for
(
uint32_t
i
=
0
;
i
<
kernelCount
;
i
++
)
{
auto
cNode
=
meta_graph
->
nodes
()
->
GetAs
<
schema
::
CNode
>
(
i
);
auto
cNode
=
meta_graph
->
nodes
()
->
GetAs
<
schema
::
CNode
>
(
i
);
std
::
vector
<
tensor
::
Tensor
*>
inputs
;
std
::
vector
<
tensor
::
Tensor
*>
inputs
;
...
@@ -115,7 +117,31 @@ int Scheduler::InitOp2Kernel(const lite::Model *model, std::vector<tensor::Tenso
...
@@ -115,7 +117,31 @@ int Scheduler::InitOp2Kernel(const lite::Model *model, std::vector<tensor::Tenso
}
else
{
}
else
{
primitive
->
SetInferFlag
(
false
);
primitive
->
SetInferFlag
(
false
);
}
}
}
return
RET_OK
;
}
int
Scheduler
::
InitOp2Kernel
(
const
lite
::
Model
*
model
,
std
::
vector
<
tensor
::
Tensor
*>
*
tensors
,
std
::
vector
<
kernel
::
LiteKernel
*>
*
kernels
)
{
MS_EXCEPTION_IF_NULL
(
model
);
MS_EXCEPTION_IF_NULL
(
tensors
);
auto
meta_graph
=
model
->
GetMetaGraph
();
MS_EXCEPTION_IF_NULL
(
meta_graph
);
uint32_t
kernelCount
=
meta_graph
->
nodes
()
->
size
();
auto
graph_output_node_indexes
=
GetGraphOutputNodes
(
meta_graph
);
for
(
uint32_t
i
=
0
;
i
<
kernelCount
;
i
++
)
{
auto
cNode
=
meta_graph
->
nodes
()
->
GetAs
<
schema
::
CNode
>
(
i
);
std
::
vector
<
tensor
::
Tensor
*>
inputs
;
std
::
vector
<
tensor
::
Tensor
*>
outputs
;
auto
inIndexes
=
cNode
->
inputIndex
();
for
(
size_t
j
=
0
;
j
<
inIndexes
->
size
();
j
++
)
{
inputs
.
emplace_back
(
tensors
->
at
(
size_t
(
inIndexes
->
GetAs
<
uint32_t
>
(
j
))));
}
auto
outIndexes
=
cNode
->
outputIndex
();
for
(
size_t
j
=
0
;
j
<
outIndexes
->
size
();
j
++
)
{
outputs
.
emplace_back
(
tensors
->
at
(
size_t
(
outIndexes
->
GetAs
<
uint32_t
>
(
j
))));
}
auto
*
primitive
=
model
->
GetOp
(
cNode
->
name
()
->
str
());
auto
*
kernel
=
this
->
ScheduleNode
(
inputs
,
outputs
,
primitive
);
auto
*
kernel
=
this
->
ScheduleNode
(
inputs
,
outputs
,
primitive
);
if
(
nullptr
==
kernel
)
{
if
(
nullptr
==
kernel
)
{
MS_LOG
(
ERROR
)
<<
"ScheduleNode return nullptr, name: "
<<
cNode
->
name
()
->
str
()
MS_LOG
(
ERROR
)
<<
"ScheduleNode return nullptr, name: "
<<
cNode
->
name
()
->
str
()
...
...
mindspore/lite/src/scheduler.h
浏览文件 @
1c1ee660
...
@@ -38,6 +38,7 @@ class Scheduler {
...
@@ -38,6 +38,7 @@ class Scheduler {
private:
private:
int
InitOp2Kernel
(
const
lite
::
Model
*
model
,
std
::
vector
<
tensor
::
Tensor
*>
*
tensors
,
int
InitOp2Kernel
(
const
lite
::
Model
*
model
,
std
::
vector
<
tensor
::
Tensor
*>
*
tensors
,
std
::
vector
<
kernel
::
LiteKernel
*>
*
kernels
);
std
::
vector
<
kernel
::
LiteKernel
*>
*
kernels
);
int
InferShape
(
const
lite
::
Model
*
model
,
std
::
vector
<
tensor
::
Tensor
*>
*
tensors
);
// construct SubGraphKernel for each kernel-group in markedKernelGroup
// construct SubGraphKernel for each kernel-group in markedKernelGroup
void
ConstructSubgraphs
(
std
::
vector
<
kernel
::
LiteKernel
*>
*
kernels
);
void
ConstructSubgraphs
(
std
::
vector
<
kernel
::
LiteKernel
*>
*
kernels
);
...
...
mindspore/lite/test/CMakeLists.txt
浏览文件 @
1c1ee660
...
@@ -144,6 +144,7 @@ if (SUPPORT_GPU)
...
@@ -144,6 +144,7 @@ if (SUPPORT_GPU)
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/matmul.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/matmul.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/softmax.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/softmax.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/concat.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/concat.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/batchnorm.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/activation.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/activation.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/transpose.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/transpose.cc
...
@@ -314,6 +315,7 @@ if (SUPPORT_GPU)
...
@@ -314,6 +315,7 @@ if (SUPPORT_GPU)
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/matmul_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/matmul_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/concat_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/concat_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/softmax_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/softmax_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/arithmetic_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/arithmetic_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/avg_pooling_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/avg_pooling_tests.cc
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
0 → 100644
浏览文件 @
1c1ee660
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include "utils/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
#include "mindspore/lite/src/common/file_utils.h"
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h"
namespace
mindspore
{
class
TestBatchnormOpenCL
:
public
mindspore
::
CommonTest
{
public:
TestBatchnormOpenCL
()
{}
};
template
<
typename
T
>
void
CompareOutputData1
(
T
*
output_data
,
T
*
correct_data
,
int
size
,
float
err_bound
)
{
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
T
abs
=
fabs
(
output_data
[
i
]
-
correct_data
[
i
]);
ASSERT_LE
(
abs
,
err_bound
);
}
}
TEST_F
(
TestBatchnormOpenCL
,
Batchnorminput_dim4
)
{
MS_LOG
(
INFO
)
<<
"begin test"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"Read tensors from .bin"
;
std
::
vector
<
int
>
input_shape
=
{
1
,
256
,
256
,
48
};
std
::
vector
<
int
>
output_shape
=
{
1
,
256
,
256
,
48
};
auto
data_type
=
kNumberTypeFloat32
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
// get the input from .bin
size_t
input_size
,
output_size
;
std
::
string
input_path
=
"./test_data/in_data.bin"
;
std
::
string
mean_path
=
"./test_data/mean.bin"
;
std
::
string
var_path
=
"./test_data/var.bin"
;
std
::
string
output_path
=
"./test_data/out_data.bin"
;
auto
input_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
input_path
.
c_str
(),
&
input_size
));
auto
correct_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
output_path
.
c_str
(),
&
output_size
));
size_t
mean_size
,
var_size
;
auto
mean_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
mean_path
.
c_str
(),
&
mean_size
));
auto
var_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
var_path
.
c_str
(),
&
var_size
));
MS_LOG
(
INFO
)
<<
"construct tensors"
;
lite
::
tensor
::
Tensor
*
tensor_data
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NHWC
,
tensor_type
);
lite
::
tensor
::
Tensor
*
tensor_mean
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
{
1
,
1
,
1
,
input_shape
[
3
]},
schema
::
Format_NHWC
,
tensor_type
);
lite
::
tensor
::
Tensor
*
tensor_var
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
{
1
,
1
,
1
,
input_shape
[
3
]},
schema
::
Format_NHWC
,
tensor_type
);
lite
::
tensor
::
Tensor
*
tensor_scale
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
{
1
,
1
,
1
,
input_shape
[
3
]},
schema
::
Format_NHWC
,
tensor_type
);
lite
::
tensor
::
Tensor
*
tensor_offset
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
{
1
,
1
,
1
,
input_shape
[
3
]},
schema
::
Format_NHWC
,
tensor_type
);
if
(
tensor_data
==
nullptr
||
tensor_mean
==
nullptr
||
tensor_var
==
nullptr
||
tensor_scale
==
nullptr
||
tensor_offset
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"init tensor failed"
;
return
;
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC4
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"init tensor failed"
;
return
;
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
=
{
tensor_data
,
tensor_scale
,
tensor_offset
,
tensor_mean
,
tensor_var
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
MS_LOG
(
INFO
)
<<
"initialize tensors"
;
auto
param
=
new
(
std
::
nothrow
)
BatchNormParameter
();
if
(
param
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new BatchNormParameter failed"
;
return
;
}
param
->
epsilon_
=
pow
(
10
,
-
5
);
auto
*
batchnorm_kernel
=
new
(
std
::
nothrow
)
kernel
::
BatchNormOpenCLKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
if
(
batchnorm_kernel
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new kernel::BatchNorm_kernel failed"
;
return
;
}
batchnorm_kernel
->
Init
();
// to do allocate memory for inputs and outputs
for
(
auto
&
input_tensor
:
inputs
)
{
input_tensor
->
MallocData
(
allocator
);
}
MS_LOG
(
INFO
)
<<
"initialize sub_graph"
;
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
batchnorm_kernel
};
auto
*
sub_graph
=
new
(
std
::
nothrow
)
kernel
::
SubGraphOpenCLKernel
(
inputs
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
sub_graph
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new kernel::SubGraphOpenCLKernel failed"
;
return
;
}
sub_graph
->
Init
();
MS_LOG
(
INFO
)
<<
"init tensors"
;
std
::
cout
<<
"init tensors"
<<
std
::
endl
;
memcpy
(
inputs
[
0
]
->
Data
(),
input_data
,
input_size
);
auto
&
temp
=
inputs
[
1
];
auto
tensor_temp
=
reinterpret_cast
<
float
*>
(
temp
->
Data
());
int
UPDIV_tensor_scale
=
UP_DIV
(
tensor_scale
->
ElementsNum
(),
C4NUM
)
*
4
;
for
(
int
i
=
0
;
i
<
UPDIV_tensor_scale
;
++
i
)
{
tensor_temp
[
i
]
=
static_cast
<
float
>
(
1
);
}
memcpy
(
inputs
[
3
]
->
Data
(),
mean_data
,
mean_size
);
memcpy
(
inputs
[
4
]
->
Data
(),
var_data
,
var_size
);
std
::
cout
<<
"==================output data================"
<<
std
::
endl
;
sub_graph
->
Run
();
auto
*
output_data_gpu
=
reinterpret_cast
<
float
*>
(
output_tensor
->
Data
());
CompareOutputData1
(
output_data_gpu
,
correct_data
,
output_tensor
->
ElementsNum
(),
0.0001
);
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录