Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
d05bd35a
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
d05bd35a
编写于
3月 02, 2018
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'feature_wuch' into 'master'
multi-input concat opencl kernel See merge request !258
上级
a7ee7f0f
2d79d5c3
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
83 addition
and
4 deletion
+83
-4
mace/kernels/opencl/cl/concat.cl
mace/kernels/opencl/cl/concat.cl
+16
-0
mace/kernels/opencl/concat.cc
mace/kernels/opencl/concat.cc
+61
-3
mace/ops/concat_test.cc
mace/ops/concat_test.cc
+6
-1
未找到文件。
mace/kernels/opencl/cl/concat.cl
浏览文件 @
d05bd35a
...
...
@@ -71,6 +71,22 @@ __kernel void concat_channel(__read_only image2d_t input0,
WRITE_IMAGET
(
output,
(
int2
)(
mad24
(
chan_blk_idx,
width,
width_idx
)
,
hb_idx
)
,
data
)
;
}
//
Required:
All
input
channels
are
divisible
by
4
__kernel
void
concat_channel_multi
(
__read_only
image2d_t
input,
__private
const
int
chan_blk_offset,
__write_only
image2d_t
output
)
{
const
int
chan_blk_idx
=
get_global_id
(
0
)
;
const
int
width_idx
=
get_global_id
(
1
)
;
const
int
width
=
get_global_size
(
1
)
;
const
int
hb_idx
=
get_global_id
(
2
)
;
DATA_TYPE4
data
=
0
;
data
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
mad24
(
chan_blk_idx,
width,
width_idx
)
,
hb_idx
))
;
WRITE_IMAGET
(
output,
(
int2
)(
mad24
(
chan_blk_idx
+
chan_blk_offset,
width,
width_idx
)
,
hb_idx
)
,
data
)
;
}
//__kernel
void
concat_width
(
__read_only
image2d_t
input0,
//
__read_only
image2d_t
input1,
//
__private
const
int
input0_width,
...
...
mace/kernels/opencl/concat.cc
浏览文件 @
d05bd35a
...
...
@@ -63,21 +63,71 @@ static void Concat2(cl::Kernel *kernel,
TuningOrRun3DKernel
(
*
kernel
,
ss
.
str
(),
gws
,
lws
,
future
);
}
static
void
ConcatN
(
cl
::
Kernel
*
kernel
,
const
std
::
vector
<
const
Tensor
*>
&
input_list
,
const
DataType
dt
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
height
=
output
->
dim
(
1
);
const
index_t
width
=
output
->
dim
(
2
);
const
index_t
channel
=
output
->
dim
(
3
);
const
int
channel_blk
=
RoundUpDiv4
(
channel
);
if
(
kernel
->
get
()
==
nullptr
)
{
auto
runtime
=
OpenCLRuntime
::
Global
();
std
::
set
<
std
::
string
>
built_options
;
std
::
string
kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"concat_channel_multi"
);
built_options
.
emplace
(
"-Dconcat_channel_multi="
+
kernel_name
);
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToCLCMDDt
(
dt
));
*
kernel
=
runtime
->
BuildKernel
(
"concat"
,
kernel_name
,
built_options
);
}
const
int
inputs_count
=
input_list
.
size
();
index_t
chan_blk_offset
=
0
;
for
(
int
i
=
0
;
i
<
inputs_count
;
++
i
)
{
const
Tensor
*
input
=
input_list
[
i
];
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input
->
buffer
())));
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
chan_blk_offset
));
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
buffer
())));
index_t
input_channel_blk
=
input
->
dim
(
3
)
/
4
;
chan_blk_offset
+=
input_channel_blk
;
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
input_channel_blk
),
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
batch
*
height
),
};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"concat_n_opencl_kernel_"
<<
input_channel_blk
<<
"_"
<<
width
<<
"_"
<<
batch
*
height
;
TuningOrRun3DKernel
(
*
kernel
,
ss
.
str
(),
gws
,
lws
,
future
);
}
}
template
<
typename
T
>
void
ConcatFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_list
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
int
inputs_count
=
input_list
.
size
();
MACE_CHECK
(
inputs_count
=
=
2
&&
axis_
==
3
)
<<
"Concat opencl kernel only support
two
elements with axis == 3"
;
MACE_CHECK
(
inputs_count
>
=
2
&&
axis_
==
3
)
<<
"Concat opencl kernel only support
>=2
elements with axis == 3"
;
const
Tensor
*
input0
=
input_list
[
0
];
bool
divisible_four
=
input0
->
dim
(
axis_
)
%
4
==
0
;
std
::
vector
<
index_t
>
output_shape
(
input0
->
shape
());
for
(
int
i
=
1
;
i
<
inputs_count
;
++
i
)
{
const
Tensor
*
input
=
input_list
[
i
];
MACE_CHECK
(
input
->
dim_size
()
==
input0
->
dim_size
(),
"Ranks of all input tensors must be same."
);
divisible_four
&=
input
->
dim
(
axis_
)
%
4
==
0
;
for
(
int
j
=
0
;
j
<
input
->
dim_size
();
++
j
)
{
if
(
j
==
axis_
)
{
continue
;
...
...
@@ -87,6 +137,8 @@ void ConcatFunctor<DeviceType::OPENCL, T>::operator()(const std::vector<const Te
}
output_shape
[
axis_
]
+=
input
->
dim
(
axis_
);
}
MACE_CHECK
(
inputs_count
==
2
||
divisible_four
,
"Dimensions of inputs should be divisible by 4 when inputs_count > 2."
);
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
image_shape
);
output
->
ResizeImage
(
output_shape
,
image_shape
);
...
...
@@ -96,7 +148,13 @@ void ConcatFunctor<DeviceType::OPENCL, T>::operator()(const std::vector<const Te
Concat2
(
&
kernel_
,
input_list
[
0
],
input_list
[
1
],
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
break
;
default:
MACE_NOT_IMPLEMENTED
;
default:
if
(
divisible_four
)
{
ConcatN
(
&
kernel_
,
input_list
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
}
};
...
...
mace/ops/concat_test.cc
浏览文件 @
d05bd35a
...
...
@@ -143,7 +143,7 @@ template <typename T>
void
OpenclRandomTest
(
const
std
::
vector
<
std
::
vector
<
index_t
>>
&
shapes
,
const
int
axis
)
{
srand
(
time
(
nullptr
));
int
num_inputs
=
2
;
int
num_inputs
=
shapes
.
size
()
;
int
concat_axis_size
=
0
;
// Construct graph
OpsTestNet
net
;
...
...
@@ -212,3 +212,8 @@ TEST_F(ConcatOpTest, OPENCLHalfAligned) {
TEST_F
(
ConcatOpTest
,
OPENCLUnAligned
)
{
OpenclRandomTest
<
float
>
({{
3
,
32
,
32
,
13
},
{
3
,
32
,
32
,
17
}},
3
);
}
TEST_F
(
ConcatOpTest
,
OPENCLAlignedMultiInput
)
{
OpenclRandomTest
<
float
>
({{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
32
}},
3
);
}
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录