Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
00bf6808
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
00bf6808
编写于
8月 04, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 04, 2020
浏览文件
操作
浏览文件
下载
差异文件
!3871 add opengl arthimitic
Merge pull request !3871 from chenzhongming/master
上级
5b7875ba
cbf7f21b
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
141 addition
and
105 deletion
+141
-105
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/arithmetic.cl
...pore/lite/src/runtime/kernel/opencl/cl/fp32/arithmetic.cl
+0
-49
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/arithmetic_buffer.cl
...te/src/runtime/kernel/opencl/cl/fp32/arithmetic_buffer.cl
+51
-0
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/arithmetic_image2d.cl
...e/src/runtime/kernel/opencl/cl/fp32/arithmetic_image2d.cl
+15
-0
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
...spore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
+65
-51
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h
+10
-5
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/arithmetic.cl
已删除
100644 → 0
浏览文件 @
5b7875ba
__kernel
void
ArithmeticAdd
(
__global
float
*input_a,
__global
float
*input_b,
__global
float
*output,
const
unsigned
int
n
)
{
int
id
=
get_global_id
(
0
)
;
if
(
id
<
n
)
{
output[id]
=
input_a[id]
+
input_b[id]
;
}
}
__kernel
void
ArithmeticSub
(
__global
float
*input_a,
__global
float
*input_b,
__global
float
*output,
const
unsigned
int
n
)
{
int
id
=
get_global_id
(
0
)
;
if
(
id
<
n
)
{
output[id]
=
input_a[id]
-
input_b[id]
;
}
}
__kernel
void
ArithmeticMul
(
__global
float
*input_a,
__global
float
*input_b,
__global
float
*output,
const
unsigned
int
n
)
{
int
id
=
get_global_id
(
0
)
;
if
(
id
<
n
)
{
output[id]
=
input_a[id]
*
input_b[id]
;
}
}
__kernel
void
ArithmeticDiv
(
__global
float
*input_a,
__global
float
*input_b,
__global
float
*output,
const
unsigned
int
n
)
{
int
id
=
get_global_id
(
0
)
;
if
(
id
<
n
)
{
output[id]
=
input_a[id]
*
input_b[id]
;
}
}
__kernel
void
ArithmeticBiasAdd
(
__global
float4
*input,
__global
float4
*output,
const
float
weight,
const
float
bias,
const
unsigned
int
n
)
{
int
id
=
get_global_id
(
0
)
;
float4
bias_vec
=
(
float4
)(
bias,
0.0f,
.
0f,
.
0f
)
;
float4
weight_vec
=
(
float4
)(
weight,
0.0f,
.
0f,
.
0f
)
;
if
(
id
<
n
)
{
output[id]
=
weight_vec
*
input[id]
+
bias_vec
;
}
}
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/arithmetic_buffer.cl
0 → 100644
浏览文件 @
00bf6808
__kernel
void
ElementAdd
(
__global
float
*input_a,
__global
float
*input_b,
__global
float
*output,
const
unsigned
int
n
)
{
int
idx
=
get_global_id
(
0
)
;
if
(
idx
>=
n
)
return
;
output[idx]
=
input_a[idx]
+
input_b[idx]
;
}
__kernel
void
ElementSub
(
__global
float
*input_a,
__global
float
*input_b,
__global
float
*output,
const
unsigned
int
n
)
{
int
idx
=
get_global_id
(
0
)
;
if
(
idx
>=
n
)
return
;
output[idx]
=
input_a[idx]
-
input_b[idx]
;
}
__kernel
void
ElementMul
(
__global
float
*input_a,
__global
float
*input_b,
__global
float
*output,
const
unsigned
int
n
)
{
int
idx
=
get_global_id
(
0
)
;
if
(
idx
>=
n
)
return
;
output[idx]
=
input_a[idx]
*
input_b[idx]
;
}
__kernel
void
ElementDiv
(
__global
float
*input_a,
__global
float
*input_b,
__global
float
*output,
const
unsigned
int
n
)
{
int
idx
=
get_global_id
(
0
)
;
if
(
idx
>=
n
)
return
;
output[idx]
=
input_a[idx]
*
input_b[idx]
;
}
__kernel
void
BoardcastAdd
(
__global
float
*input_a,
float
input_b,
__global
float
*output,
const
unsigned
int
n
)
{
int
idx
=
get_global_id
(
0
)
;
if
(
idx
>=
n
)
return
;
output[idx]
=
input_a[idx]
+
input_b
;
}
__kernel
void
BoardcastSub
(
__global
float
*input_a,
float
input_b,
__global
float
*output,
const
unsigned
int
n
)
{
int
idx
=
get_global_id
(
0
)
;
if
(
idx
>=
n
)
return
;
output[idx]
=
input_a[idx]
-
input_b
;
}
__kernel
void
BoardcastMul
(
__global
float
*input_a,
float
input_b,
__global
float
*output,
const
unsigned
int
n
)
{
int
idx
=
get_global_id
(
0
)
;
if
(
idx
>=
n
)
return
;
output[idx]
=
input_a[idx]
*
input_b
;
}
__kernel
void
BoardcastDiv
(
__global
float
*input_a,
float
input_b,
__global
float
*output,
const
unsigned
int
n
)
{
int
idx
=
get_global_id
(
0
)
;
if
(
idx
>=
n
)
return
;
output[idx]
=
input_a[idx]
*
input_b
;
}
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/arithmetic_image2d.cl
0 → 100644
浏览文件 @
00bf6808
__constant
sampler_t
smp_none
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_NONE |
CLK_FILTER_NEAREST
;
__kernel
void
ElementAdd
(
__read_only
image2d_t
*input_a,
__read_only
image2d_t
*input_b,
__write_only
image2d_t
*output,
const
int4
output_shape
)
{
int
X
=
get_global_id
(
0
)
;
int
Y
=
get_global_id
(
1
)
;
int
Z
=
get_global_id
(
2
)
;
if
(
X
>=
output_shape.x
|
| Y >= output_shape.y |
|
Z
>=
output_shape.w
)
return
;
if
(
idx
>=
n
)
return
;
float4
a
=
read_imagef
(
input_a,
smp_none,
(
int2
)(
X,
Y
*
output_shape.w
+
Z
))
;
float4
b
=
read_imagef
(
input_b,
smp_none,
(
int2
)(
X,
Y
*
output_shape.w
+
Z
))
;
src
=
a
+
b
;
write_imagef
(
output,
(
int2
)(
0
,
0
)
,
src
)
;
}
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
浏览文件 @
00bf6808
/**
* Copyright 20
19
Huawei Technologies Co., Ltd
* Copyright 20
20
Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
...
...
@@ -13,15 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/opencl/kernel/arithmetic.h"
#include <set>
#include <vector>
#include <string>
#include <set>
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/kernel/opencl/kernel/arithmetic.h"
#ifndef PROGRAM_WITH_IL
#include "src/runtime/kernel/opencl/cl/fp32/arithmetic.cl.inc"
#include "src/runtime/kernel/opencl/cl/fp32/arithmetic_buffer.cl.inc"
#include "src/runtime/kernel/opencl/cl/fp32/arithmetic_image2d.cl.inc"
#endif
using
mindspore
::
kernel
::
KERNEL_ARCH
::
kGPU
;
...
...
@@ -29,44 +30,53 @@ using mindspore::lite::KernelRegistrar;
namespace
mindspore
::
kernel
{
std
::
vector
<
size_t
>
ArithmeticOpenCLKernel
::
InitGlobalSize
()
const
{
const
size_t
global_x
=
outputs_
[
0
]
->
Width
();
const
size_t
global_y
=
outputs_
[
0
]
->
Height
();
const
size_t
global_z
=
UP_ROUND_DIV
(
outputs_
[
0
]
->
Channel
(),
4
);
std
::
vector
<
size_t
>
global
=
{
global_x
,
global_y
,
global_z
};
return
global
;
}
void
ArithmeticOpenCLKernel
::
Image2dGetWorkGroupSize
()
{
global_size_
=
InitGlobalSize
();
int
max_work_group_size
=
runtime_
->
GetKernelMaxWorkGroupSize
(
kernel_
(),
(
*
runtime_
->
Device
())());
local_size_
=
GetLocalSize
(
global_size_
,
max_work_group_size
);
global_size_
=
GetGlobalSize
(
local_size_
,
global_size_
);
}
void
ArithmeticOpenCLKernel
::
BufferGetWorkGroupSize
()
{
uint32_t
element_num
=
outputs_
[
0
]
->
ElementsC4Num
();
global_size_
=
{
element_num
};
}
int
ArithmeticOpenCLKernel
::
Init
()
{
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
std
::
string
kernel_name
=
"ArithmeticAdd"
;
runtime_
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
std
::
string
element_name
;
std
::
string
boardcast_name
;
is_bias_add_
=
false
;
if
(
inputs_
[
1
]
->
TensorType
()
==
schema
::
NodeType_ValueNode
&&
inputs_
[
1
]
->
Data
()
!=
nullptr
)
{
kernel_name
=
"ArithmeticBiasAdd"
;
is_bias_add_
=
true
;
element_flag_
=
false
;
}
else
{
element_flag_
=
true
;
}
switch
(
opParameter
->
type_
)
{
case
PrimitiveType_Mul
:
if
(
is_bias_add_
)
{
weight_
=
static_cast
<
float
*>
(
inputs_
[
1
]
->
Data
())[
0
];
break
;
}
kernel_name
=
"ArithmeticMul"
;
element_name
=
"ElementMul"
;
boardcast_name
=
"BoardcastMul"
;
break
;
case
PrimitiveType_Add
:
if
(
is_bias_add_
)
{
bias_
=
static_cast
<
float
*>
(
inputs_
[
1
]
->
Data
())[
0
];
break
;
}
kernel_name
=
"ArithmeticAdd"
;
element_name
=
"ElementAdd"
;
boardcast_name
=
"BoardcastAdd"
;
break
;
case
PrimitiveType_Sub
:
if
(
is_bias_add_
)
{
bias_
=
-
1
*
static_cast
<
float
*>
(
inputs_
[
1
]
->
Data
())[
0
];
break
;
}
kernel_name
=
"ArithmeticSub"
;
element_name
=
"ElementSub"
;
boardcast_name
=
"BoardcastSub"
;
break
;
case
PrimitiveType_Div
:
if
(
is_bias_add_
)
{
weight_
=
1
/
static_cast
<
float
*>
(
inputs_
[
1
]
->
Data
())[
0
];
break
;
}
kernel_name
=
"ArithmeticDiv"
;
element_name
=
"ElementDiv"
;
boardcast_name
=
"BoardcastDiv"
;
break
;
default:
MS_LOG
(
ERROR
)
<<
"Error Operator type "
<<
opParameter
->
type_
;
...
...
@@ -74,39 +84,44 @@ int ArithmeticOpenCLKernel::Init() {
}
#ifdef PROGRAM_WITH_IL
ocl_runtime
->
CreateKernelFromIL
(
kernel_
(),
kernel_name
);
runtime_
->
CreateKernelFromIL
(
kernel_
(),
kernel_name
);
#else
std
::
string
program_name
=
"Arithmetic"
;
std
::
set
<
std
::
string
>
build_options
;
std
::
string
source
=
arithmetic_source_fp32
;
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
std
::
string
source
=
arithmetic_buffer_source_fp32
;
runtime_
->
LoadSource
(
program_name
,
source
);
if
(
element_flag_
)
{
runtime_
->
BuildKernel
(
kernel_
,
program_name
,
element_name
,
build_options
);
MS_LOG
(
DEBUG
)
<<
element_name
<<
" Init Done!"
;
}
else
{
runtime_
->
BuildKernel
(
kernel_
,
program_name
,
boardcast_name
,
build_options
);
MS_LOG
(
DEBUG
)
<<
boardcast_name
<<
" Init Done!"
;
}
#endif
outputs_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
MS_LOG
(
DEBUG
)
<<
kernel_name
<<
" Init Done!"
;
return
0
;
}
int
ArithmeticOpenCLKernel
::
Run
()
{
MS_LOG
(
DEBUG
)
<<
this
->
Name
()
<<
" Running!"
;
auto
runtime_
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
BufferGetWorkGroupSize
();
int
arg_idx
=
0
;
uint32_t
element_num
=
outputs_
[
0
]
->
ElementsC4Num
();
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
std
::
vector
<
size_t
>
global
=
{
element_num
};
std
::
vector
<
size_t
>
local
;
ocl_runtime
->
SetKernelArg
(
kernel_
,
0
,
inputs_
[
0
]
->
Data
());
if
(
is_bias_add_
)
{
MS_LOG
(
DEBUG
)
<<
"weight: "
<<
weight_
<<
" bias: "
<<
bias_
;
ocl_runtime
->
SetKernelArg
(
kernel_
,
1
,
outputs_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
2
,
weight_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
3
,
bias_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
4
,
element_num
/
C4NUM
);
runtime_
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
inputs_
[
0
]
->
Data
());
if
(
element_flag_
)
{
runtime_
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
inputs_
[
1
]
->
Data
());
}
else
{
ocl_runtime
->
SetKernelArg
(
kernel_
,
1
,
inputs_
[
1
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
2
,
outputs_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
3
,
element_num
);
runtime_
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
static_cast
<
float
*>
(
inputs_
[
1
]
->
Data
())[
0
]);
}
return
ocl_runtime
->
RunKernel
(
kernel_
,
global
,
local
,
nullptr
);
runtime_
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
outputs_
[
0
]
->
Data
());
runtime_
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
element_num
);
runtime_
->
RunKernel
(
kernel_
,
global_size_
,
local_size_
,
nullptr
);
return
0
;
}
kernel
::
LiteKernel
*
OpenCLArithmeticKernelCreator
(
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
...
...
@@ -132,4 +147,3 @@ REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Add, OpenCLArithmeticKernelCr
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Sub
,
OpenCLArithmeticKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Div
,
OpenCLArithmeticKernelCreator
)
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h
浏览文件 @
00bf6808
...
...
@@ -28,18 +28,23 @@ class ArithmeticOpenCLKernel : public ArithmeticCPUKernel {
explicit
ArithmeticOpenCLKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
const
lite
::
Context
*
ctx
)
:
ArithmeticCPUKernel
(
parameter
,
inputs
,
outputs
,
ctx
)
{}
~
ArithmeticOpenCLKernel
()
override
{};
~
ArithmeticOpenCLKernel
()
override
{};
int
Init
()
override
;
int
Run
()
override
;
private:
std
::
vector
<
size_t
>
InitGlobalSize
()
const
;
void
Image2dGetWorkGroupSize
();
void
BufferGetWorkGroupSize
();
cl
::
Kernel
kernel_
;
bool
is_bias_add_
{
false
};
float
weight_
{
1.
f
};
float
bias_
{
.0
f
};
lite
::
opencl
::
OpenCLRuntime
*
runtime_
;
bool
element_flag_
{
true
};
std
::
vector
<
size_t
>
local_size_
;
std
::
vector
<
size_t
>
global_size_
;
};
}
// namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_BACKEND_OPENCL_ARITHMETIC_H_
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录