Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
a3b45b71
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a3b45b71
编写于
9月 07, 2020
作者:
C
chenzupeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix bug in leakyrelu
上级
77dd91a6
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
37 addition
and
79 deletion
+37
-79
mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl
mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl
+7
-22
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
...spore/lite/src/runtime/kernel/opencl/kernel/activation.cc
+2
-50
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
+27
-6
mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
...ite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
+1
-1
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl
浏览文件 @
a3b45b71
...
...
@@ -5,33 +5,18 @@
#
define
MIN
(
X,
Y
)
(
X
<
Y
?
X
:
Y
)
__constant
sampler_t
smp_zero
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
__kernel
void
LeakyRelu
_NHWC4
(
__read_only
image2d_t
input,
__write_only
image2d_t
output,
const
int4
img_shape,
__global
FLT4
*alpha,
const
int4
input_shape
)
{
__kernel
void
LeakyRelu
(
__read_only
image2d_t
input,
__write_only
image2d_t
output,
const
int4
img_shape,
const
float
alpha
)
{
int
Y
=
get_global_id
(
0
)
; // H
int
X
=
get_global_id
(
1
)
; // W C4
if
(
X
>=
img_shape.z
|
| Y >= img_shape.y) return;
int C = X % UP_DIV(input_shape.w, SLICES);
FLT4 in_c4 = READ_IMAGE(input, smp_zero, (int2)(X, Y));
FLT4 tmp;
tmp.x = in_c4.x > 0.0f ? in_c4.x : in_c4.x * alpha[C].x;
tmp.y = in_c4.y > 0.0f ? in_c4.y : in_c4.y * alpha[C].y;
tmp.z = in_c4.z > 0.0f ? in_c4.z : in_c4.z * alpha[C].z;
tmp.w = in_c4.w > 0.0f ? in_c4.w : in_c4.w * alpha[C].w;
WRITE_IMAGE(output, (int2)(X, Y), tmp);
}
__kernel void LeakyRelu_NC4HW4(__read_only image2d_t input, __write_only image2d_t output, const int4 img_shape,
__global FLT4 *alpha, const int4 input_shape) {
int Y = get_global_id(0); // C4 H
int X = get_global_id(1); // W
if (X >= img_shape.z || Y >= img_shape.y) return;
int C = Y / input_shape.y;
FLT4 in_c4 = READ_IMAGE(input, smp_zero, (int2)(X, Y));
FLT4 tmp;
tmp.x = in_c4.x > 0.0f ? in_c4.x : in_c4.x * alpha[C].x;
tmp.y = in_c4.y > 0.0f ? in_c4.y : in_c4.y * alpha[C].y;
tmp.z = in_c4.z > 0.0f ? in_c4.z : in_c4.z * alpha[C].z;
tmp.w = in_c4.w > 0.0f ? in_c4.w : in_c4.w * alpha[C].w;
FLT alpha_f = TO_FLT(alpha);
tmp.x = in_c4.x > 0.0f ? in_c4.x : in_c4.x * alpha_f;
tmp.y = in_c4.y > 0.0f ? in_c4.y : in_c4.y * alpha_f;
tmp.z = in_c4.z > 0.0f ? in_c4.z : in_c4.z * alpha_f;
tmp.w = in_c4.w > 0.0f ? in_c4.w : in_c4.w * alpha_f;
WRITE_IMAGE(output, (int2)(X, Y), tmp);
}
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
浏览文件 @
a3b45b71
...
...
@@ -39,46 +39,7 @@ using mindspore::schema::PrimitiveType_Activation;
namespace
mindspore
::
kernel
{
void
ActivationOpenClKernel
::
InitBuffer
()
{
auto
allocator
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
()
->
GetAllocator
();
int
elem_num
=
UP_ROUND
(
nhwc_shape_
[
3
],
C4NUM
);
alpha_buff_
=
allocator
->
Malloc
(
elem_num
*
fp_size
);
alpha_buff_
=
allocator
->
MapBuffer
(
alpha_buff_
,
CL_MAP_WRITE
,
nullptr
,
true
);
memset
(
alpha_buff_
,
0x00
,
elem_num
*
fp_size
);
if
(
in_tensors_
.
size
()
==
1
)
{
if
(
enable_fp16_
)
{
uint16_t
alpha_fp16
=
Float32ToShort
(
alpha_
);
auto
alpha_buff_fp16
=
reinterpret_cast
<
uint16_t
*>
(
alpha_buff_
);
for
(
int
i
=
0
;
i
<
nhwc_shape_
[
3
];
i
++
)
{
alpha_buff_fp16
[
i
]
=
alpha_fp16
;
}
}
else
{
auto
alpha_buff_fp16
=
reinterpret_cast
<
float
*>
(
alpha_buff_
);
for
(
int
i
=
0
;
i
<
nhwc_shape_
[
3
];
i
++
)
{
alpha_buff_fp16
[
i
]
=
alpha_
;
}
}
}
else
{
if
(
enable_fp16_
)
{
if
(
in_tensors_
[
1
]
->
data_type
()
==
kNumberTypeFloat32
)
{
auto
alpha_buff_fp16
=
reinterpret_cast
<
uint16_t
*>
(
alpha_buff_
);
for
(
int
i
=
0
;
i
<
nhwc_shape_
[
3
];
i
++
)
{
alpha_buff_fp16
[
i
]
=
Float32ToShort
(
reinterpret_cast
<
float
*>
(
in_tensors_
[
0
]
->
Data
())[
i
]);
}
}
else
{
memcpy
(
alpha_buff_
,
in_tensors_
[
0
]
->
Data
(),
nhwc_shape_
[
3
]
*
fp_size
);
}
}
else
{
if
(
in_tensors_
[
1
]
->
data_type
()
==
kNumberTypeFloat16
)
{
MS_LOG
(
WARNING
)
<<
"fp16 model run in fp32 mode not support."
;
memcpy
(
alpha_buff_
,
in_tensors_
[
0
]
->
Data
(),
nhwc_shape_
[
3
]
*
fp_size
);
}
else
{
memcpy
(
alpha_buff_
,
in_tensors_
[
0
]
->
Data
(),
nhwc_shape_
[
3
]
*
fp_size
);
}
}
}
allocator
->
UnmapBuffer
(
alpha_buff_
);
}
void
ActivationOpenClKernel
::
InitBuffer
()
{}
int
ActivationOpenClKernel
::
Init
()
{
in_size_
=
in_tensors_
[
0
]
->
shape
().
size
();
...
...
@@ -102,9 +63,6 @@ int ActivationOpenClKernel::Init() {
MS_LOG
(
ERROR
)
<<
"Activate fun only support dim=4 or 2, but your dim="
<<
in_size_
;
return
RET_ERROR
;
}
if
(
type_
==
ActivationType_LEAKY_RELU
)
{
InitBuffer
();
}
std
::
map
<
int
,
std
::
vector
<
std
::
string
>>
Program_Kernel
{
{
ActivationType_LEAKY_RELU
,
std
::
vector
<
std
::
string
>
{
"LEAKY_RELU"
,
"LeakyRelu"
}},
{
ActivationType_RELU
,
std
::
vector
<
std
::
string
>
{
"RELU"
,
"Relu"
}},
...
...
@@ -119,9 +77,6 @@ int ActivationOpenClKernel::Init() {
std
::
set
<
std
::
string
>
build_options
;
ocl_runtime
->
LoadSource
(
Program_Kernel
[
type_
][
0
],
source
);
std
::
string
kernel_name
=
Program_Kernel
[
type_
][
1
];
if
(
type_
==
ActivationType_LEAKY_RELU
)
{
kernel_name
+=
"_"
+
std
::
string
(
EnumNameFormat
(
op_format_
));
}
ocl_runtime
->
BuildKernel
(
kernel_
,
Program_Kernel
[
type_
][
0
],
kernel_name
,
build_options
);
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
...
...
@@ -140,10 +95,7 @@ int ActivationOpenClKernel::Run() {
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
out_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
img2d_shape
);
if
(
type_
==
ActivationType_LEAKY_RELU
)
{
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
alpha_buff_
,
lite
::
opencl
::
MemType
::
BUF
);
cl_int4
input_shape
=
{
static_cast
<
int
>
(
nhwc_shape_
[
0
]),
static_cast
<
int
>
(
nhwc_shape_
[
1
]),
static_cast
<
int
>
(
nhwc_shape_
[
2
]),
static_cast
<
int
>
(
nhwc_shape_
[
3
])};
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
input_shape
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
alpha_
);
}
std
::
vector
<
size_t
>
local
=
{};
std
::
vector
<
size_t
>
global
=
{
static_cast
<
size_t
>
(
img2d_shape
.
s
[
1
]),
static_cast
<
size_t
>
(
img2d_shape
.
s
[
2
])};
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
浏览文件 @
a3b45b71
...
...
@@ -22,6 +22,7 @@
#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "nnacl/fp32/common_func.h"
#include "src/runtime/kernel/opencl/kernel/prelu.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/cl/prelu.cl.inc"
...
...
@@ -35,18 +36,38 @@ using mindspore::schema::PrimitiveType_PReLU;
namespace
mindspore
::
kernel
{
void
PReluOpenCLKernel
::
InitBuffer
()
{
int
C
=
in_tensors_
[
1
]
->
shape
()[
0
];
int
div_ci
=
UP_DIV
(
C
,
C4NUM
);
auto
allocator
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
()
->
GetAllocator
();
int
elem_num
=
in_tensors_
[
0
]
->
shape
().
size
()
==
2
?
in_tensors_
[
0
]
->
shape
()[
1
]
:
in_tensors_
[
0
]
->
shape
()[
3
];
int
elem_num_c4
=
UP_DIV
(
elem_num
,
C4NUM
);
size_t
img_dtype
=
CL_FLOAT
;
if
(
enable_fp16_
)
{
img_dtype
=
CL_HALF_FLOAT
;
}
std
::
vector
<
size_t
>
img_size
{
size_t
(
div_ci
),
1
,
img_dtype
};
PReluWeight_
=
allocator
->
Malloc
(
div_ci
*
C4NUM
*
fp_size
,
img_size
);
std
::
vector
<
size_t
>
img_size
{
size_t
(
elem_num_c4
),
1
,
img_dtype
};
PReluWeight_
=
allocator
->
Malloc
(
elem_num_c4
*
C4NUM
*
fp_size
,
img_size
);
PReluWeight_
=
allocator
->
MapBuffer
(
PReluWeight_
,
CL_MAP_WRITE
,
nullptr
,
true
);
memset
(
PReluWeight_
,
0x00
,
div_ci
*
C4NUM
*
fp_size
);
memcpy
(
PReluWeight_
,
in_tensors_
[
1
]
->
Data
(),
C
*
fp_size
);
memset
(
PReluWeight_
,
0x00
,
elem_num_c4
*
C4NUM
*
fp_size
);
if
(
enable_fp16_
)
{
if
(
in_tensors_
[
1
]
->
data_type
()
==
kNumberTypeFloat32
)
{
auto
PReluWeight_fp16
=
reinterpret_cast
<
uint16_t
*>
(
PReluWeight_
);
auto
in_tensor_data_fp32
=
reinterpret_cast
<
float
*>
(
in_tensors_
[
1
]
->
Data
());
for
(
int
i
=
0
;
i
<
elem_num
;
i
++
)
{
PReluWeight_fp16
[
i
]
=
Float32ToShort
(
in_tensor_data_fp32
[
i
]);
}
}
else
{
memcpy
(
PReluWeight_
,
in_tensors_
[
1
]
->
Data
(),
elem_num
*
fp_size
);
}
}
else
{
if
(
in_tensors_
[
1
]
->
data_type
()
==
kNumberTypeFloat16
)
{
auto
PReluWeight_fp32
=
reinterpret_cast
<
float
*>
(
PReluWeight_
);
auto
in_tensor_data_fp16
=
reinterpret_cast
<
uint16_t
*>
(
in_tensors_
[
1
]
->
Data
());
for
(
int
i
=
0
;
i
<
elem_num
;
i
++
)
{
PReluWeight_fp32
[
i
]
=
ShortToFloat32
(
in_tensor_data_fp16
[
i
]);
}
}
else
{
memcpy
(
PReluWeight_
,
in_tensors_
[
1
]
->
Data
(),
elem_num
*
fp_size
);
}
}
allocator
->
UnmapBuffer
(
PReluWeight_
);
}
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
浏览文件 @
a3b45b71
...
...
@@ -432,7 +432,7 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) {
std
::
vector
<
int
>
input_shape
=
{
1
,
9
};
// need modify
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
schema
::
Format
format
=
schema
::
Format_NC
;
// need modify
schema
::
Format
op_format
=
schema
::
Format_NC4
;
// need modify
schema
::
Format
op_format
=
schema
::
Format_N
HW
C4
;
// need modify
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
input_tensor
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new input tensor error!"
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录