Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
6cc1d13d
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
6cc1d13d
编写于
10月 23, 2017
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update opencl kernel
上级
2a7274f4
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
8 addition
and
116 deletion
+8
-116
mace/core/BUILD
mace/core/BUILD
+4
-19
mace/core/runtime/opencl/opencl_smoketest.cc
mace/core/runtime/opencl/opencl_smoketest.cc
+0
-75
mace/kernels/opencl/cl/conv_2d_1x1.cl
mace/kernels/opencl/cl/conv_2d_1x1.cl
+3
-12
mace/kernels/opencl/cl/simple_add.cl
mace/kernels/opencl/cl/simple_add.cl
+0
-9
mace/kernels/opencl/conv_2d_opencl_1x1.cc
mace/kernels/opencl/conv_2d_opencl_1x1.cc
+1
-1
未找到文件。
mace/core/BUILD
浏览文件 @
6cc1d13d
...
@@ -14,15 +14,11 @@ cc_library(
...
@@ -14,15 +14,11 @@ cc_library(
srcs
=
glob
([
srcs
=
glob
([
"runtime/opencl/cl.hpp"
,
"runtime/opencl/cl.hpp"
,
"runtime/opencl/cl2.hpp"
,
"runtime/opencl/cl2.hpp"
,
"runtime/opencl/opencl_allocator.cc"
,
"runtime/opencl/*.cc"
,
"runtime/opencl/opencl_wrapper.cc"
,
]),
"runtime/opencl/opencl_runtime.cc"
,
hdrs
=
glob
([
"runtime/opencl/*.h"
,
]),
]),
hdrs
=
[
"runtime/opencl/opencl_allocator.h"
,
"runtime/opencl/opencl_runtime.h"
,
"runtime/opencl/opencl_wrapper.h"
,
],
copts
=
[
"-std=c++11"
],
copts
=
[
"-std=c++11"
],
deps
=
[
deps
=
[
"core"
,
"core"
,
...
@@ -31,17 +27,6 @@ cc_library(
...
@@ -31,17 +27,6 @@ cc_library(
alwayslink
=
1
,
alwayslink
=
1
,
)
)
cc_binary
(
name
=
"opencl_smoketest"
,
srcs
=
glob
([
"runtime/opencl/opencl_smoketest.cc"
,
]),
copts
=
[
"-std=c++11"
],
deps
=
[
"opencl_runtime"
,
],
)
cc_library
(
cc_library
(
name
=
"core"
,
name
=
"core"
,
srcs
=
glob
([
srcs
=
glob
([
...
...
mace/core/runtime/opencl/opencl_smoketest.cc
已删除
100644 → 0
浏览文件 @
2a7274f4
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/logging.h"
#include "mace/core/operator.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/runtime/opencl/opencl_wrapper.h"
int
main
()
{
using
namespace
mace
;
auto
runtime
=
mace
::
OpenCLRuntime
::
Get
();
mace
::
Tensor
ta
(
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
DataType
::
DT_INT32
);
mace
::
Tensor
tb
(
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
DataType
::
DT_INT32
);
mace
::
Tensor
tc
(
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
DataType
::
DT_INT32
);
mace
::
Tensor
tstep
(
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
DataType
::
DT_INT32
);
int
n
=
1000
;
std
::
vector
<
index_t
>
shape
=
{
n
};
ta
.
Resize
(
shape
);
tb
.
Resize
(
shape
);
tc
.
Resize
(
shape
);
tstep
.
Resize
({
1
});
int
step_size
=
10
;
int
global_size
=
n
/
step_size
;
{
mace
::
Tensor
::
MappingGuard
ta_mapper
(
&
ta
);
mace
::
Tensor
::
MappingGuard
tb_mapper
(
&
tb
);
mace
::
Tensor
::
MappingGuard
tstep_mapper
(
&
tstep
);
int32_t
*
a
=
ta
.
mutable_data
<
int32_t
>
();
int32_t
*
b
=
tb
.
mutable_data
<
int32_t
>
();
int32_t
*
step
=
tstep
.
mutable_data
<
int32_t
>
();
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
a
[
i
]
=
i
;
b
[
i
]
=
2
*
i
;
}
step
[
0
]
=
step_size
;
}
auto
program
=
runtime
->
program
();
auto
simple_add
=
cl
::
KernelFunctor
<
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
>
(
program
,
"simple_add"
);
cl_int
error
;
simple_add
(
cl
::
EnqueueArgs
(
runtime
->
command_queue
(),
cl
::
NDRange
(
global_size
),
cl
::
NullRange
),
*
(
static_cast
<
cl
::
Buffer
*>
(
ta
.
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
tb
.
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
tc
.
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
tstep
.
buffer
())),
error
);
if
(
error
!=
0
)
{
LOG
(
ERROR
)
<<
"Failed to execute kernel "
<<
error
;
}
{
mace
::
Tensor
::
MappingGuard
ta_mapper
(
&
ta
);
mace
::
Tensor
::
MappingGuard
tb_mapper
(
&
tb
);
mace
::
Tensor
::
MappingGuard
tc_mapper
(
&
tc
);
int32_t
*
a
=
ta
.
mutable_data
<
int32_t
>
();
int32_t
*
b
=
tb
.
mutable_data
<
int32_t
>
();
int32_t
*
c
=
tc
.
mutable_data
<
int32_t
>
();
bool
correct
=
true
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
if
(
c
[
i
]
!=
a
[
i
]
+
b
[
i
])
correct
=
false
;
}
LOG
(
INFO
)
<<
"OpenCL test result: "
<<
(
correct
?
"correct"
:
"incorrect"
);
}
return
0
;
}
mace/kernels/opencl/cl/conv_2d_1x1.cl
浏览文件 @
6cc1d13d
...
@@ -28,20 +28,11 @@ void kernel conv_2d_1x1_naive(global const float *input, /* n, c, h, w */
...
@@ -28,20 +28,11 @@ void kernel conv_2d_1x1_naive(global const float *input, /* n, c, h, w */
for
(
int
out_chan
=
out_chan_begin
; out_chan < out_chan_end; ++out_chan) {
for
(
int
out_chan
=
out_chan_begin
; out_chan < out_chan_end; ++out_chan) {
float
weights
=
filter[out_chan
*
in_chan_num
+
in_chan]
;
float
weights
=
filter[out_chan
*
in_chan_num
+
in_chan]
;
float
*output_ptr
=
output_base
+
out_chan
*
pixel_num
;
float
*output_ptr
=
output_base
+
out_chan
*
pixel_num
;
/*
TODO
fix
vload/vstore
*/
/*
for
(
int
p
=
0
; p < 2; ++p) {
for
(
int
p
=
0
; p < 2; ++p) {
float4
in
=
vload4
(
p
*
4
,
input_ptr
)
;
float4
in
=
vload4
(
p,
input_ptr
)
;
float4
out
=
vload4
(
p
*
4
,
output_ptr
)
;
float4
out
=
vload4
(
p,
output_ptr
)
;
out
+=
in
*
weights
;
out
+=
in
*
weights
;
vstore4
(
out,
p
*
4
,
output_ptr
)
;
vstore4
(
out,
p,
output_ptr
)
;
}
*/
for
(
int
p
=
0
; p < 8; ++p) {
float
in
=
input_ptr[p]
;
float
out
=
output_ptr[p]
;
out
+=
in
*
weights
;
output_ptr[p]
=
out
;
}
}
}
}
}
else
{
}
else
{
...
...
mace/kernels/opencl/cl/simple_add.cl
已删除
100644 → 0
浏览文件 @
2a7274f4
void
kernel
simple_add
(
global
const
int
*a,
global
const
int
*b,
global
int
*c,
global
const
int
*step
)
{
int
id
=
get_global_id
(
0
)
;
int
start
=
step[0]
*
id
;
int
stop
=
start
+
step[0]
;
for
(
int
i
=
start
; i < stop; i++) c[i] = a[i] + b[i];
}
mace/kernels/opencl/conv_2d_opencl_1x1.cc
浏览文件 @
6cc1d13d
...
@@ -81,7 +81,7 @@ extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *filter,
...
@@ -81,7 +81,7 @@ extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *filter,
cl_int
error
;
cl_int
error
;
conv_2d
(
cl
::
EnqueueArgs
(
runtime
->
command_queue
(),
conv_2d
(
cl
::
EnqueueArgs
(
runtime
->
command_queue
(),
cl
::
NDRange
(
chan_blk_num
,
pixel_blk_num
),
cl
::
NDRange
(
chan_blk_num
,
pixel_blk_num
),
cl
::
N
ullRange
),
cl
::
N
DRange
(
1
,
64
)
),
*
(
static_cast
<
cl
::
Buffer
*>
(
input
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
input
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
filter
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
filter
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录