Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
c263eff4
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c263eff4
编写于
9月 08, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
9月 08, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5876 [MS][LITE][Develop] add new ops named arithmetic_self
Merge pull request !5876 from pengyongrong/arithmetic_self
上级
947247bb
bb5f02d3
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
882 addition
and
1 deletion
+882
-1
mindspore/lite/src/runtime/kernel/opencl/cl/arithmeticself.cl
...spore/lite/src/runtime/kernel/opencl/cl/arithmeticself.cl
+450
-0
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc
.../lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc
+240
-0
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h
...e/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h
+52
-0
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
+5
-1
mindspore/lite/test/CMakeLists.txt
mindspore/lite/test/CMakeLists.txt
+2
-0
mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc
...est/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc
+133
-0
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/cl/arithmeticself.cl
0 → 100644
浏览文件 @
c263eff4
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
__constant
sampler_t
smp_none
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_NONE |
CLK_FILTER_NEAREST
;
__kernel
void
ArithmeticSelf_ElementAbs_NHWC4
(
__read_only
image2d_t
input0,
__write_only
image2d_t
output,
int4
output_shape
)
{
int
X
=
get_global_id
(
0
)
; // N*H
int
Y
=
get_global_id
(
1
)
; // W
int
Z
=
get_global_id
(
2
)
; // c/4
if
(
X
>=
output_shape.x
*
output_shape.y
|
| Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X)));
result.x = result.x >= 0 ? result.x : -result.x;
result.y = result.y >= 0 ? result.y : -result.y;
result.z = result.z >= 0 ? result.z : -result.z;
result.w = result.w >= 0 ? result.w : -result.w;
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
__kernel void ArithmeticSelf_ElementAbs_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y), (Z * output_shape.y + X)));
result.x = result.x >= 0 ? result.x : -result.x;
result.y = result.y >= 0 ? result.y : -result.y;
result.z = result.z >= 0 ? result.z : -result.z;
result.w = result.w >= 0 ? result.w : -result.w;
WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result);
}
__kernel void ArithmeticSelf_ElementCos_NHWC4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X)));
result.x = cos(result.x);
result.y = cos(result.y);
result.z = cos(result.z);
result.w = cos(result.w);
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
__kernel void ArithmeticSelf_ElementCos_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y), (Z * output_shape.y + X)));
result.x = cos(result.x);
result.y = cos(result.y);
result.z = cos(result.z);
result.w = cos(result.w);
WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result);
}
__kernel void ArithmeticSelf_ElementSin_NHWC4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X)));
result.x = sin(result.x);
result.y = sin(result.y);
result.z = sin(result.z);
result.w = sin(result.w);
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
__kernel void ArithmeticSelf_ElementSin_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y), (Z * output_shape.y + X)));
result.x = sin(result.x);
result.y = sin(result.y);
result.z = sin(result.z);
result.w = sin(result.w);
WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result);
}
__kernel void ArithmeticSelf_ElementTanh_NHWC4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X)));
result.x = tanh(result.x);
result.y = tanh(result.y);
result.z = tanh(result.z);
result.w = tanh(result.w);
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
__kernel void ArithmeticSelf_ElementTanh_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y), (Z * output_shape.y + X)));
result.x = tanh(result.x);
result.y = tanh(result.y);
result.z = tanh(result.z);
result.w = tanh(result.w);
WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result);
}
__kernel void ArithmeticSelf_ElementNeg_NHWC4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X)));
result.x = -result.x;
result.y = -result.y;
result.z = -result.z;
result.w = -result.w;
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
__kernel void ArithmeticSelf_ElementNeg_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y), (Z * output_shape.y + X)));
result.x = -result.x;
result.y = -result.y;
result.z = -result.z;
result.w = -result.w;
WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result);
}
__kernel void ArithmeticSelf_ElementExp_NHWC4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X)));
result.x = exp(result.x);
result.y = exp(result.y);
result.z = exp(result.z);
result.w = exp(result.w);
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
__kernel void ArithmeticSelf_ElementExp_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y), (Z * output_shape.y + X)));
result.x = exp(result.x);
result.y = exp(result.y);
result.z = exp(result.z);
result.w = exp(result.w);
WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result);
}
__kernel void ArithmeticSelf_ElementLog_NHWC4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X)));
result.x = result.x > 0 ? log(result.x) : HUGE_VALF;
result.y = result.y > 0 ? log(result.y) : HUGE_VALF;
result.z = result.z > 0 ? log(result.z) : HUGE_VALF;
result.w = result.w > 0 ? log(result.w) : HUGE_VALF;
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
__kernel void ArithmeticSelf_ElementLog_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y), (Z * output_shape.y + X)));
result.x = result.x > 0 ? log(result.x) : HUGE_VALF;
result.y = result.y > 0 ? log(result.y) : HUGE_VALF;
result.z = result.z > 0 ? log(result.z) : HUGE_VALF;
result.w = result.w > 0 ? log(result.w) : HUGE_VALF;
WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result);
}
__kernel void ArithmeticSelf_ElementSquare_NHWC4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X)));
result.x = result.x * result.x;
result.y = result.y * result.y;
result.z = result.z * result.z;
result.w = result.w * result.w;
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
__kernel void ArithmeticSelf_ElementSquare_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y), (Z * output_shape.y + X)));
result.x = result.x * result.x;
result.y = result.y * result.y;
result.z = result.z * result.z;
result.w = result.w * result.w;
WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result);
}
__kernel void ArithmeticSelf_ElementSqrt_NHWC4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X)));
result.x = result.x > 0 ? sqrt(result.x) : HUGE_VALF;
result.y = result.y > 0 ? sqrt(result.y) : HUGE_VALF;
result.z = result.z > 0 ? sqrt(result.z) : HUGE_VALF;
result.w = result.w > 0 ? sqrt(result.w) : HUGE_VALF;
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
__kernel void ArithmeticSelf_ElementSqrt_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y), (Z * output_shape.y + X)));
result.x = result.x > 0 ? sqrt(result.x) : HUGE_VALF;
result.y = result.y > 0 ? sqrt(result.y) : HUGE_VALF;
result.z = result.z > 0 ? sqrt(result.z) : HUGE_VALF;
result.w = result.w > 0 ? sqrt(result.w) : HUGE_VALF;
WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result);
}
__kernel void ArithmeticSelf_ElementRsqrt_NHWC4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X)));
result.x = result.x > 0 ? 1.0f / sqrt(result.x) : HUGE_VALF;
result.y = result.y > 0 ? 1.0f / sqrt(result.y) : HUGE_VALF;
result.z = result.z > 0 ? 1.0f / sqrt(result.z) : HUGE_VALF;
result.w = result.w > 0 ? 1.0f / sqrt(result.w) : HUGE_VALF;
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
__kernel void ArithmeticSelf_ElementRsqrt_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y), (Z * output_shape.y + X)));
result.x = result.x > 0 ? 1.0f / sqrt(result.x) : HUGE_VALF;
result.y = result.y > 0 ? 1.0f / sqrt(result.y) : HUGE_VALF;
result.z = result.z > 0 ? 1.0f / sqrt(result.z) : HUGE_VALF;
result.w = result.w > 0 ? 1.0f / sqrt(result.w) : HUGE_VALF;
WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result);
}
__kernel void ArithmeticSelf_ElementLogicalNot_NHWC4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X)));
result.x = result.x > 0 || result.x < 0 ? false : true;
result.y = result.y > 0 || result.y < 0 ? false : true;
result.z = result.z > 0 || result.z < 0 ? false : true;
result.w = result.w > 0 || result.w < 0 ? false : true;
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
__kernel void ArithmeticSelf_ElementLogicalNot_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y), (Z * output_shape.y + X)));
result.x = result.x > 0 || result.x < 0 ? false : true;
result.y = result.y > 0 || result.y < 0 ? false : true;
result.z = result.z > 0 || result.z < 0 ? false : true;
result.w = result.w > 0 || result.w < 0 ? false : true;
WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result);
}
__kernel void ArithmeticSelf_ElementFloor_NHWC4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X)));
result.x = floor(result.x);
result.y = floor(result.y);
result.z = floor(result.z);
result.w = floor(result.w);
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
__kernel void ArithmeticSelf_ElementFloor_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y), (Z * output_shape.y + X)));
result.x = floor(result.x);
result.y = floor(result.y);
result.z = floor(result.z);
result.w = floor(result.w);
WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result);
}
__kernel void ArithmeticSelf_ElementCeil_NHWC4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X)));
result.x = ceil(result.x);
result.y = ceil(result.y);
result.z = ceil(result.z);
result.w = ceil(result.w);
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
__kernel void ArithmeticSelf_ElementCeil_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y), (Z * output_shape.y + X)));
result.x = ceil(result.x);
result.y = ceil(result.y);
result.z = ceil(result.z);
result.w = ceil(result.w);
WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result);
}
__kernel void ArithmeticSelf_ElementRound_NHWC4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
return;
}
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X)));
result.x = round(result.x);
result.y = round(result.y);
result.z = round(result.z);
result.w = round(result.w);
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
__kernel void ArithmeticSelf_ElementRound_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output,
int4 output_shape) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z |
|
Z
>=
output_shape.w
)
{
return
;
}
FLT4
result
=
READ_IMAGE
(
input0,
smp_none,
(
int2
)((
Y
)
,
(
Z
*
output_shape.y
+
X
)))
;
result.x
=
round
(
result.x
)
;
result.y
=
round
(
result.y
)
;
result.z
=
round
(
result.z
)
;
result.w
=
round
(
result.w
)
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
,
(
Z
*
output_shape.y
+
X
))
,
result
)
;
}
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc
0 → 100644
浏览文件 @
c263eff4
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstring>
#include <algorithm>
#include <set>
#include<string>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/arithmetic_self.h"
#include "src/runtime/kernel/opencl/cl/arithmeticself.cl.inc"
using
mindspore
::
kernel
::
KERNEL_ARCH
::
kGPU
;
using
mindspore
::
lite
::
KernelRegistrar
;
using
mindspore
::
schema
::
PrimitiveType_Abs
;
using
mindspore
::
schema
::
PrimitiveType_Ceil
;
using
mindspore
::
schema
::
PrimitiveType_Cos
;
using
mindspore
::
schema
::
PrimitiveType_Exp
;
using
mindspore
::
schema
::
PrimitiveType_Floor
;
using
mindspore
::
schema
::
PrimitiveType_Log
;
using
mindspore
::
schema
::
PrimitiveType_LogicalNot
;
using
mindspore
::
schema
::
PrimitiveType_Neg
;
using
mindspore
::
schema
::
PrimitiveType_Round
;
using
mindspore
::
schema
::
PrimitiveType_Rsqrt
;
using
mindspore
::
schema
::
PrimitiveType_Sin
;
using
mindspore
::
schema
::
PrimitiveType_Sqrt
;
using
mindspore
::
schema
::
PrimitiveType_Square
;
namespace
mindspore
::
kernel
{
int
ArithmeticSelfOpenCLKernel
::
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
{
size_t
CO4
=
UP_DIV
(
out_tensors_
[
0
]
->
Channel
(),
C4NUM
);
size_t
im_dst_x
,
im_dst_y
;
if
(
in_tensors_
[
0
]
->
GetFormat
()
==
schema
::
Format_NHWC4
)
{
im_dst_x
=
out_tensors_
[
0
]
->
Width
()
*
CO4
;
im_dst_y
=
out_tensors_
[
0
]
->
Height
()
*
out_tensors_
[
0
]
->
Batch
();
}
else
{
im_dst_y
=
out_tensors_
[
0
]
->
Batch
()
*
out_tensors_
[
0
]
->
Height
()
*
CO4
;
im_dst_x
=
out_tensors_
[
0
]
->
Width
();
}
size_t
img_dtype
=
CL_FLOAT
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
enable_fp16_
=
ocl_runtime
->
GetFp16Enable
();
if
(
enable_fp16_
)
{
img_dtype
=
CL_HALF_FLOAT
;
}
img_size
->
clear
();
std
::
vector
<
size_t
>
vec
{
im_dst_x
,
im_dst_y
,
img_dtype
};
*
img_size
=
vec
;
return
RET_OK
;
}
void
ArithmeticSelfOpenCLKernel
::
GetKernelName
(
std
::
string
*
kernel_name
,
ArithmeticSelfParameter
*
param
)
{
switch
(
param
->
op_parameter_
.
type_
)
{
case
PrimitiveType_Abs
:
kernel_name
[
0
]
+=
"_ElementAbs"
;
break
;
case
PrimitiveType_Cos
:
kernel_name
[
0
]
+=
"_ElementCos"
;
break
;
case
PrimitiveType_Exp
:
kernel_name
[
0
]
+=
"_ElementExp"
;
break
;
case
PrimitiveType_Log
:
kernel_name
[
0
]
+=
"_ElementLog"
;
break
;
case
PrimitiveType_Square
:
kernel_name
[
0
]
+=
"_ElementSquare"
;
break
;
case
PrimitiveType_Sqrt
:
kernel_name
[
0
]
+=
"_ElementSqrt"
;
break
;
case
PrimitiveType_Rsqrt
:
kernel_name
[
0
]
+=
"_ElementRsqrt"
;
break
;
case
PrimitiveType_Sin
:
kernel_name
[
0
]
+=
"_ElementSin"
;
break
;
case
PrimitiveType_LogicalNot
:
kernel_name
[
0
]
+=
"_ElementLogicalNot"
;
break
;
case
PrimitiveType_Floor
:
kernel_name
[
0
]
+=
"_ElementFloor"
;
break
;
case
PrimitiveType_Ceil
:
kernel_name
[
0
]
+=
"_ElementCeil"
;
break
;
case
PrimitiveType_Round
:
kernel_name
[
0
]
+=
"_ElementRound"
;
case
PrimitiveType_Neg
:
kernel_name
[
0
]
+=
"_ElementNeg"
;
break
;
default:
break
;
}
}
int
ArithmeticSelfOpenCLKernel
::
Init
()
{
if
(
in_tensors_
[
0
]
->
shape
().
size
()
!=
4
)
{
MS_LOG
(
ERROR
)
<<
" only support dim = 4 "
;
return
RET_ERROR
;
}
auto
param
=
reinterpret_cast
<
ArithmeticSelfParameter
*>
(
this
->
op_parameter_
);
auto
in_format
=
op_format_
;
if
(
in_format
!=
schema
::
Format_NHWC4
&&
in_format
!=
schema
::
Format_NC4HW4
)
{
MS_LOG
(
ERROR
)
<<
"input format("
<<
in_format
<<
") "
<<
"format not support!"
;
return
RET_ERROR
;
}
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
op_format_
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
op_format_
);
std
::
string
kernel_name
=
"ArithmeticSelf"
;
GetKernelName
(
&
kernel_name
,
param
);
if
(
in_format
==
schema
::
Format_NC4HW4
)
{
kernel_name
+=
"_NC4HW4"
;
}
else
if
(
in_format
==
schema
::
Format_NHWC4
)
{
kernel_name
+=
"_NHWC4"
;
}
MS_LOG
(
DEBUG
)
<<
"execute kernel name : "
<<
kernel_name
;
std
::
set
<
std
::
string
>
build_options
;
std
::
string
source
=
arithmeticself_source
;
std
::
string
program_name
=
"ArithmeticSelf"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
return
RET_OK
;
}
int
ArithmeticSelfOpenCLKernel
::
ReSize
()
{
return
RET_OK
;
}
int
ArithmeticSelfGetBiggestDividerWithPriority
(
int
number
,
int
max_divider
)
{
if
(
number
%
8
==
0
&&
max_divider
>=
8
)
{
return
number
/
8
;
}
if
(
number
%
4
==
0
&&
4
<=
max_divider
)
{
return
number
/
4
;
}
if
(
number
%
2
==
0
&&
2
<=
max_divider
)
{
return
number
/
2
;
}
for
(
int
i
=
max_divider
;
i
!=
0
;
i
--
)
{
if
(
number
%
i
==
0
)
{
return
i
;
}
}
return
RET_OK
;
}
void
ArithmeticSelfGetWorkGroup
(
const
std
::
vector
<
size_t
>
&
global
,
std
::
vector
<
size_t
>
*
local
,
int
max_size
)
{
const
int
max_divider
=
8
;
const
int
max_x
=
4
,
max_y
=
8
;
int
x
=
std
::
min
(
ArithmeticSelfGetBiggestDividerWithPriority
(
global
[
0
],
max_divider
),
max_x
);
int
yz
=
max_size
/
x
;
int
y
=
std
::
min
(
std
::
min
(
ArithmeticSelfGetBiggestDividerWithPriority
(
global
[
1
],
max_divider
),
yz
),
max_y
);
int
z
=
std
::
min
(
yz
/
y
,
static_cast
<
int
>
(
UP_DIV
(
global
[
2
],
2
)));
local
->
clear
();
local
->
push_back
(
x
);
local
->
push_back
(
y
);
local
->
push_back
(
z
);
}
int
ArithmeticSelfOpenCLKernel
::
Run
()
{
MS_LOG
(
DEBUG
)
<<
this
->
name
()
<<
" Running! "
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
output_shape
=
out_tensors_
[
0
]
->
shape
();
cl_int4
output_shape_
=
{
output_shape
[
0
],
output_shape
[
1
],
output_shape
[
2
],
UP_DIV
(
output_shape
[
3
],
C4NUM
)};
uint32_t
OH
=
output_shape
[
0
]
*
output_shape
[
1
];
// N*H
uint32_t
OW
=
output_shape
[
2
];
uint32_t
OC
=
UP_DIV
(
output_shape
[
3
],
C4NUM
);
const
std
::
vector
<
size_t
>
&
max_global
=
ocl_runtime
->
GetWorkItemSize
();
std
::
vector
<
size_t
>
local
=
{
1
,
1
,
1
};
// init local
std
::
vector
<
size_t
>
global
=
{
OH
,
OW
,
OC
};
ArithmeticSelfGetWorkGroup
(
global
,
&
local
,
max_global
[
0
]);
int
arg_cn
=
0
;
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
in_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
out_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
output_shape_
);
ocl_runtime
->
RunKernel
(
kernel_
,
global
,
local
,
nullptr
);
return
RET_OK
;
}
kernel
::
LiteKernel
*
OpenCLArithmeticSelfKernelCreator
(
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
OpParameter
*
opParameter
,
const
lite
::
Context
*
ctx
,
const
kernel
::
KernelKey
&
desc
,
const
mindspore
::
lite
::
PrimitiveC
*
primitive
)
{
auto
*
kernel
=
new
(
std
::
nothrow
)
ArithmeticSelfOpenCLKernel
(
opParameter
,
inputs
,
outputs
);
if
(
kernel
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
" new ArithmeticSelfOpenCLKernel failed "
;
return
nullptr
;
}
auto
ret
=
kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
" Init kernel failed, name: ArithmeticSelf "
;
delete
kernel
;
return
nullptr
;
}
return
kernel
;
}
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Abs
,
OpenCLArithmeticSelfKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Ceil
,
OpenCLArithmeticSelfKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Cos
,
OpenCLArithmeticSelfKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Exp
,
OpenCLArithmeticSelfKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Floor
,
OpenCLArithmeticSelfKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Log
,
OpenCLArithmeticSelfKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_LogicalNot
,
OpenCLArithmeticSelfKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Round
,
OpenCLArithmeticSelfKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Rsqrt
,
OpenCLArithmeticSelfKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Sin
,
OpenCLArithmeticSelfKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Neg
,
OpenCLArithmeticSelfKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Sqrt
,
OpenCLArithmeticSelfKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Square
,
OpenCLArithmeticSelfKernelCreator
)
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h
0 → 100644
浏览文件 @
c263eff4
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_ARITHMETIC_SELF_PARAMETER_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_ARITHMETIC_SELF_PARAMETER_H_
#include <vector>
#include<string>
#include "ir/anf.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "nnacl/arithmetic_self_parameter.h"
namespace
mindspore
::
kernel
{
class
ArithmeticSelfOpenCLKernel
:
public
OpenCLKernel
{
public:
explicit
ArithmeticSelfOpenCLKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
)
:
OpenCLKernel
(
parameter
,
inputs
,
outputs
)
{}
~
ArithmeticSelfOpenCLKernel
()
override
{};
int
Init
()
override
;
int
ReSize
()
override
;
int
Run
()
override
;
int
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
override
;
void
GetKernelName
(
std
::
string
*
kernel_name
,
ArithmeticSelfParameter
*
param
);
private:
cl
::
Kernel
kernel_
;
};
}
// namespace mindspore::kernel
#endif
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
浏览文件 @
c263eff4
...
...
@@ -114,7 +114,11 @@ int TransposeOpenCLKernel::Run() {
cl_int2
C
=
{
c
,
c4
};
int
arg_idx
=
0
;
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
in_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
out_tensors_
[
0
]
->
Data
());
if
(
out_mem_type_
==
OpenCLMemType
::
BUF
)
{
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
out_tensors_
[
0
]
->
Data
(),
lite
::
opencl
::
MemType
::
BUF
);
}
else
{
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
out_tensors_
[
0
]
->
Data
());
}
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
HW
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
C
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
w
);
...
...
mindspore/lite/test/CMakeLists.txt
浏览文件 @
c263eff4
...
...
@@ -150,6 +150,7 @@ if (SUPPORT_GPU)
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/matmul.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/softmax.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/concat.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/arithmetic_self.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/batchnorm.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/slice.cc
${
LITE_DIR
}
/src/runtime/kernel/opencl/kernel/activation.cc
...
...
@@ -334,6 +335,7 @@ if (SUPPORT_GPU)
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/concat_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/slice_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/softmax_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/arithmetic_tests.cc
${
TEST_DIR
}
/ut/src/runtime/kernel/opencl/avg_pooling_tests.cc
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc
0 → 100644
浏览文件 @
c263eff4
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include "utils/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
#include "mindspore/lite/src/common/file_utils.h"
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h"
namespace
mindspore
{
class
TestArithmeticSelfOpenCLfp16
:
public
mindspore
::
CommonTest
{
public:
TestArithmeticSelfOpenCLfp16
()
{}
};
template
<
typename
T
>
void
CompareOutputData1
(
T
*
input_data1
,
T
*
output_data
,
T
*
correct_data
,
int
size
,
float
err_bound
)
{
for
(
size_t
i
=
0
;
i
<
100
;
i
++
)
{
T
abs
=
fabs
(
output_data
[
i
]
-
correct_data
[
i
]);
ASSERT_LE
(
abs
,
err_bound
);
}
}
TEST_F
(
TestArithmeticSelfOpenCLfp16
,
ArithmeticSelfOpenCLFp16
)
{
MS_LOG
(
INFO
)
<<
" begin test "
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
SetFp16Enable
(
true
);
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
// get the input from .bin
size_t
input1_size
,
output_size
;
std
::
string
input1Ppath
=
"./test_data/in_arithmetic_selffp16.bin"
;
std
::
string
correctOutputPath
=
"./test_data/out_arithmetic_selffp16.bin"
;
auto
input_data1
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
input1Ppath
.
c_str
(),
&
input1_size
));
auto
correctOutput
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
correctOutputPath
.
c_str
(),
&
output_size
));
MS_LOG
(
INFO
)
<<
" init tensors "
;
std
::
vector
<
int
>
shape
=
{
1
,
19
,
19
,
96
};
auto
data_type
=
kNumberTypeFloat16
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
shape
,
schema
::
Format_NHWC
,
tensor_type
);
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
shape
,
schema
::
Format_NHWC
,
tensor_type
);
if
(
input_tensor
==
nullptr
||
output_tensor
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
" new input_tensor or output_tensor failed "
;
return
;
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
{
input_tensor
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
MS_LOG
(
INFO
)
<<
" initialize param "
;
auto
param
=
new
(
std
::
nothrow
)
ArithmeticSelfParameter
();
if
(
param
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
" new ConcatParameter failed "
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
return
;
}
param
->
op_parameter_
.
type_
=
schema
::
PrimitiveType_Neg
;
auto
*
arithmeticself_kernel
=
new
(
std
::
nothrow
)
kernel
::
ArithmeticSelfOpenCLKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
if
(
arithmeticself_kernel
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
" new kernel::ArithmeticSelfOpenCLKernel failed "
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
return
;
}
arithmeticself_kernel
->
SetFormatType
(
schema
::
Format_NC4HW4
);
arithmeticself_kernel
->
Init
();
// to do allocate memory for inputs and outputs
for
(
auto
&
input_tensor
:
inputs
)
{
input_tensor
->
MallocData
(
allocator
);
}
MS_LOG
(
INFO
)
<<
" initialize sub_graph "
;
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
arithmeticself_kernel
};
auto
*
sub_graph
=
new
(
std
::
nothrow
)
kernel
::
SubGraphOpenCLKernel
(
inputs
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
sub_graph
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
" new kernel::SubGraphOpenCLKernel failed "
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
delete
arithmeticself_kernel
;
return
;
}
sub_graph
->
Init
();
MS_LOG
(
INFO
)
<<
" initialize input data "
;
memcpy
(
inputs
[
0
]
->
Data
(),
input_data1
,
input1_size
);
std
::
cout
<<
"==================output data================"
<<
std
::
endl
;
sub_graph
->
Run
();
auto
*
output_data_gpu
=
reinterpret_cast
<
float16_t
*>
(
output_tensor
->
Data
());
CompareOutputData1
(
input_data1
,
output_data_gpu
,
correctOutput
,
output_tensor
->
ElementsNum
(),
0.000001
);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
delete
arithmeticself_kernel
;
delete
sub_graph
;
}
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录