Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
mindspore
提交
937dbf8b
M
mindspore
项目概览
MindSpore
/
mindspore
通知
35
Star
15
Fork
15
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
937dbf8b
编写于
9月 02, 2020
作者:
W
wandongdong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add gpu benchmark
上级
c2ff5e3f
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
61 addition
and
13 deletion
+61
-13
mindspore/lite/src/lite_kernel.h
mindspore/lite/src/lite_kernel.h
+0
-5
mindspore/lite/src/runtime/kernel/opencl/cl/transpose.cl
mindspore/lite/src/runtime/kernel/opencl/cl/transpose.cl
+14
-4
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
...spore/lite/src/runtime/kernel/opencl/kernel/activation.cc
+1
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
...lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
+1
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
+1
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
+3
-0
mindspore/lite/test/models_tflite_gpu.cfg
mindspore/lite/test/models_tflite_gpu.cfg
+3
-0
mindspore/lite/test/run_benchmark_nets.sh
mindspore/lite/test/run_benchmark_nets.sh
+37
-0
mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
...re/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
+1
-1
未找到文件。
mindspore/lite/src/lite_kernel.h
浏览文件 @
937dbf8b
...
...
@@ -27,11 +27,6 @@
#include "src/ir/tensor.h"
#include "include/errorcode.h"
#ifdef ENABLE_FP16
using
FLOAT_t
=
float16_t
;
#else
using
FLOAT_t
=
float
;
#endif
// using mindspore::kernel::AddressPtr;
namespace
mindspore
::
kernel
{
...
...
mindspore/lite/src/runtime/kernel/opencl/cl/transpose.cl
浏览文件 @
937dbf8b
...
...
@@ -55,10 +55,20 @@ __kernel void transpose_NHWC4_BUF(__read_only image2d_t src_data, global FLT4 *d
result[1] = (FLT4)(0.0f);
result[2] = (FLT4)(0.0f);
result[3] = (FLT4)(0.0f);
FLT4 x0 = READ_IMAGE(src_data, smp_zero, (int2)((4 * X) % W * C.y + Y, (4 * X) / W));
FLT4 x1 = READ_IMAGE(src_data, smp_zero, (int2)((4 * X + 1) % W * C.y + Y, (4 * X + 1) / W));
FLT4 x2 = READ_IMAGE(src_data, smp_zero, (int2)((4 * X + 2) % W * C.y + Y, (4 * X + 2) / W));
FLT4 x3 = READ_IMAGE(src_data, smp_zero, (int2)((4 * X + 3) % W * C.y + Y, (4 * X + 3) / W));
bool over_size = W * C.y > 65535;
FLT4 x0, x1, x2, x3;
if (over_size) {
x0 = READ_IMAGE(src_data, smp_zero, (int2)(C, 4 * X));
x1 = READ_IMAGE(src_data, smp_zero, (int2)(C, 4 * X + 1));
x2 = READ_IMAGE(src_data, smp_zero, (int2)(C, 4 * X + 2));
x3 = READ_IMAGE(src_data, smp_zero, (int2)(C, 4 * X + 3));
} else {
x0 = READ_IMAGE(src_data, smp_zero, (int2)((4 * X) % W * C.y + Y, (4 * X) / W));
x1 = READ_IMAGE(src_data, smp_zero, (int2)((4 * X + 1) % W * C.y + Y, (4 * X + 1) / W));
x2 = READ_IMAGE(src_data, smp_zero, (int2)((4 * X + 2) % W * C.y + Y, (4 * X + 2) / W));
x3 = READ_IMAGE(src_data, smp_zero, (int2)((4 * X + 3) % W * C.y + Y, (4 * X + 3) / W));
}
result[0].x = x0.x;
result[0].y = x1.x;
result[0].z = x2.x;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
浏览文件 @
937dbf8b
...
...
@@ -44,7 +44,7 @@ void ActivationOpenClKernel::InitBuffer() {
alpha_buff_
=
allocator
->
MapBuffer
(
alpha_buff_
,
CL_MAP_WRITE
,
nullptr
,
true
);
memset
(
alpha_buff_
,
0x00
,
fp_size
);
if
(
enable_fp16_
)
{
auto
fp16
=
(
floa
t16_t
)
alpha_
;
auto
fp16
=
(
in
t16_t
)
alpha_
;
memcpy
(
alpha_buff_
,
&
fp16
,
fp_size
);
}
else
{
memcpy
(
alpha_buff_
,
&
alpha_
,
fp_size
);
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
浏览文件 @
937dbf8b
...
...
@@ -72,7 +72,7 @@ void Conv2dTransposeOpenCLKernel::PadWeight() {
int
div_ci
=
UP_DIV
(
ci
,
C4NUM
);
int
div_co
=
UP_DIV
(
co
,
C4NUM
);
auto
allocator
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
()
->
GetAllocator
();
auto
data_size
=
enable_fp16_
?
sizeof
(
floa
t16_t
)
:
sizeof
(
float
);
auto
data_size
=
enable_fp16_
?
sizeof
(
in
t16_t
)
:
sizeof
(
float
);
// IHWO to OHWI4(I)4(O)(converter format is IHWO)
// init padWeight_(buffer mem)
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
浏览文件 @
937dbf8b
...
...
@@ -75,7 +75,7 @@ int MatMulOpenCLKernel::ReSize() { return RET_OK; }
void
MatMulOpenCLKernel
::
PadWeight
()
{
auto
allocator
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
()
->
GetAllocator
();
size_t
dtype_size
=
enable_fp16_
?
sizeof
(
floa
t16_t
)
:
sizeof
(
float
);
size_t
dtype_size
=
enable_fp16_
?
sizeof
(
in
t16_t
)
:
sizeof
(
float
);
padWeight_
=
allocator
->
Malloc
(
sizeCI
.
s
[
1
]
*
sizeCO
.
s
[
1
]
*
C4NUM
*
C4NUM
*
dtype_size
);
padWeight_
=
allocator
->
MapBuffer
(
padWeight_
,
CL_MAP_WRITE
,
nullptr
,
true
);
memset
(
padWeight_
,
0x00
,
sizeCI
.
s
[
1
]
*
sizeCO
.
s
[
1
]
*
C4NUM
*
C4NUM
*
dtype_size
);
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
浏览文件 @
937dbf8b
...
...
@@ -27,6 +27,7 @@ using mindspore::lite::KernelRegistrar;
using
mindspore
::
lite
::
RET_ERROR
;
using
mindspore
::
lite
::
RET_OK
;
using
mindspore
::
schema
::
PrimitiveType_Reshape
;
using
mindspore
::
schema
::
PrimitiveType_Squeeze
;
namespace
mindspore
::
kernel
{
...
...
@@ -142,4 +143,6 @@ kernel::LiteKernel *OpenCLReshapeKernelCreator(const std::vector<lite::tensor::T
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Reshape
,
OpenCLReshapeKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat16
,
PrimitiveType_Reshape
,
OpenCLReshapeKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Squeeze
,
OpenCLReshapeKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat16
,
PrimitiveType_Squeeze
,
OpenCLReshapeKernelCreator
)
}
// namespace mindspore::kernel
mindspore/lite/test/models_tflite_gpu.cfg
0 → 100644
浏览文件 @
937dbf8b
mobilenet_v1_1.0_224.tflite
mobilenet_v2_1.0_224.tflite
resnet.tflite
\ No newline at end of file
mindspore/lite/test/run_benchmark_nets.sh
浏览文件 @
937dbf8b
...
...
@@ -310,6 +310,42 @@ function Run_arm64() {
fi
#sleep 1
done
<
${
models_tflite_awaretraining_config
}
# Run gpu tflite converted models:
while
read
line
;
do
model_name
=
${
line
}
if
[[
$model_name
==
\#
*
]]
;
then
continue
fi
echo
${
model_name
}
>>
"
${
run_benchmark_log_file
}
"
echo
'cd /data/local/tmp/benchmark_test'
>
adb_run_cmd.txt
echo
'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='
${
model_name
}
'.ms --inDataPath=/data/local/tmp/input_output/input/'
${
model_name
}
'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'
${
model_name
}
'.ms.out --warmUpLoopCount=1 --loopCount=1'
>>
"
${
run_benchmark_log_file
}
"
echo
'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='
${
model_name
}
'.ms --inDataPath=/data/local/tmp/input_output/input/'
${
model_name
}
'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'
${
model_name
}
'.ms.out --warmUpLoopCount=1 --loopCount=1'
>>
adb_run_cmd.txt
adb
-s
${
device_id
}
shell < adb_run_cmd.txt
>>
"
${
run_benchmark_log_file
}
"
if
[
$?
=
0
]
;
then
run_result
=
'arm64_gpu: '
${
model_name
}
' pass'
echo
${
run_result
}
>>
${
run_benchmark_result_file
}
else
run_result
=
'arm64_gpu: '
${
model_name
}
' failed'
echo
${
run_result
}
>>
${
run_benchmark_result_file
}
return
1
fi
# run benchmark test without clib data
#echo ${model_name}
echo
'cd /data/local/tmp/benchmark_test'
>
adb_run_cmd.txt
echo
'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='
${
model_name
}
'.ms --warmUpLoopCount=1 --loopCount=2'
>>
"
${
run_benchmark_log_file
}
"
echo
'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='
${
model_name
}
'.ms --warmUpLoopCount=1 --loopCount=2'
>>
adb_run_cmd.txt
adb
-s
${
device_id
}
shell < adb_run_cmd.txt
>>
"
${
run_benchmark_log_file
}
"
if
[
$?
=
0
]
;
then
run_result
=
'arm64_gpu: '
${
model_name
}
' pass'
echo
${
run_result
}
>>
${
run_benchmark_result_file
}
else
run_result
=
'arm64_gpu: '
${
model_name
}
' failed'
echo
${
run_result
}
>>
${
run_benchmark_result_file
}
return
1
fi
#sleep 1
done
<
${
models_tflite_gpu_config
}
}
# Print start msg before run testcase
...
...
@@ -397,6 +433,7 @@ models_tflite_posttraining_config=${basepath}/models_tflite_posttraining.cfg
models_onnx_config
=
${
basepath
}
/models_onnx.cfg
models_fp16_config
=
${
basepath
}
/models_fp16.cfg
models_mindspore_config
=
${
basepath
}
/models_mindspore.cfg
models_tflite_gpu_config
=
${
basepath
}
/models_tflite_gpu.cfg
Convert_status
=
0
rm
-rf
${
basepath
}
/ms_models
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
浏览文件 @
937dbf8b
...
...
@@ -36,7 +36,7 @@ void RunTestCaseMatMul(const std::vector<int> &shape, void *input_data, void *we
size_t
dtype_size
=
sizeof
(
float
);
if
(
enable_fp16
)
{
ocl_runtime
->
SetFp16Enable
(
true
);
dtype_size
=
sizeof
(
floa
t16_t
);
dtype_size
=
sizeof
(
in
t16_t
);
}
auto
allocator
=
ocl_runtime
->
GetAllocator
();
int
ci
=
shape
[
0
];
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录