Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
4743a1e6
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
4743a1e6
编写于
12月 04, 2017
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Finish concat opencl kernel(just support channel dim).
上级
5c1264b3
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
421 addition
and
54 deletion
+421
-54
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+1
-0
mace/kernels/concat.h
mace/kernels/concat.h
+53
-13
mace/kernels/opencl/cl/concat.cl
mace/kernels/opencl/cl/concat.cl
+100
-0
mace/kernels/opencl/concat.cc
mace/kernels/opencl/concat.cc
+103
-0
mace/ops/concat.cc
mace/ops/concat.cc
+14
-2
mace/ops/concat.h
mace/ops/concat.h
+6
-32
mace/ops/concat_benchmark.cc
mace/ops/concat_benchmark.cc
+56
-4
mace/ops/concat_test.cc
mace/ops/concat_test.cc
+88
-3
未找到文件。
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
4743a1e6
...
@@ -147,6 +147,7 @@ const std::map<std::string, std::string>
...
@@ -147,6 +147,7 @@ const std::map<std::string, std::string>
{
"depthwise_conv_3x3"
,
"depthwise_conv_3x3.cl"
},
{
"depthwise_conv_3x3"
,
"depthwise_conv_3x3.cl"
},
{
"pooling"
,
"pooling.cl"
},
{
"pooling"
,
"pooling.cl"
},
{
"relu"
,
"relu.cl"
},
{
"relu"
,
"relu.cl"
},
{
"concat"
,
"concat.cl"
},
{
"resize_bilinear"
,
"resize_bilinear.cl"
},
{
"resize_bilinear"
,
"resize_bilinear.cl"
},
{
"space_to_batch"
,
"space_to_batch.cl"
},
{
"space_to_batch"
,
"space_to_batch.cl"
},
{
"buffer_to_image"
,
"buffer_to_image.cl"
},
{
"buffer_to_image"
,
"buffer_to_image.cl"
},
...
...
mace/kernels/concat.h
浏览文件 @
4743a1e6
...
@@ -8,25 +8,57 @@
...
@@ -8,25 +8,57 @@
#include "mace/core/common.h"
#include "mace/core/common.h"
#include "mace/core/types.h"
#include "mace/core/types.h"
#include "mace/proto/mace.pb.h"
#include "mace/proto/mace.pb.h"
#include "mace/core/tensor.h"
namespace
mace
{
namespace
mace
{
namespace
kernels
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
ConcatFunctor
{
struct
ConcatFunctor
{
void
operator
()(
std
::
vector
<
const
T
*>
&
input_list
,
void
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_list
,
const
index_t
inner_dim
,
const
int32_t
axis
,
const
index_t
*
outer_dims
,
Tensor
*
output
)
{
T
*
output
)
{
const
Tensor
*
input0
=
input_list
.
front
();
const
size_t
input_count
=
input_list
.
size
();
const
int
inputs_count
=
input_list
.
size
()
-
1
;
for
(
int
inner_idx
=
0
;
inner_idx
<
inner_dim
;
++
inner_idx
)
{
for
(
size_t
i
=
0
;
i
<
input_count
;
++
i
)
{
std
::
vector
<
index_t
>
output_shape
(
input0
->
shape
());
index_t
inner_size
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
inner_size
*=
output_shape
[
i
];
}
std
::
vector
<
index_t
>
outer_sizes
(
inputs_count
,
0
);
outer_sizes
[
0
]
=
input0
->
size
()
/
inner_size
;
for
(
int
i
=
1
;
i
<
inputs_count
;
++
i
)
{
const
Tensor
*
input
=
input_list
[
i
];
MACE_CHECK
(
input
->
dim_size
()
==
input0
->
dim_size
(),
"Ranks of all input tensors must be same."
);
for
(
int
j
=
0
;
j
<
input
->
dim_size
();
++
j
)
{
if
(
j
==
axis
)
{
continue
;
}
MACE_CHECK
(
input
->
dim
(
j
)
==
input0
->
dim
(
j
),
"Dimensions of inputs should equal except axis."
);
}
outer_sizes
[
i
]
=
input
->
size
()
/
inner_size
;
output_shape
[
axis
]
+=
input
->
dim
(
axis
);
}
output
->
Resize
(
output_shape
);
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
std
::
vector
<
const
T
*>
input_ptrs
(
input_list
.
size
(),
nullptr
);
for
(
size_t
i
=
0
;
i
<
inputs_count
;
++
i
)
{
input_ptrs
[
i
]
=
input_list
[
i
]
->
data
<
T
>
();
}
for
(
int
inner_idx
=
0
;
inner_idx
<
inner_size
;
++
inner_idx
)
{
for
(
size_t
i
=
0
;
i
<
inputs_count
;
++
i
)
{
if
(
DataTypeCanUseMemcpy
(
DataTypeToEnum
<
T
>::
v
()))
{
if
(
DataTypeCanUseMemcpy
(
DataTypeToEnum
<
T
>::
v
()))
{
memcpy
(
output
,
input_list
[
i
],
outer_dim
s
[
i
]
*
sizeof
(
T
));
memcpy
(
output
_ptr
,
input_ptrs
[
i
],
outer_size
s
[
i
]
*
sizeof
(
T
));
output
+=
outer_dim
s
[
i
];
output
_ptr
+=
outer_size
s
[
i
];
input_
list
[
i
]
+=
outer_dim
s
[
i
];
input_
ptrs
[
i
]
+=
outer_size
s
[
i
];
}
else
{
}
else
{
for
(
index_t
k
=
0
;
k
<
outer_
dim
s
[
i
];
++
k
)
{
for
(
index_t
k
=
0
;
k
<
outer_
size
s
[
i
];
++
k
)
{
*
output
++
=
*
input_list
[
i
]
++
;
*
output
_ptr
++
=
*
input_ptrs
[
i
]
++
;
}
}
}
}
}
}
...
@@ -34,6 +66,14 @@ struct ConcatFunctor {
...
@@ -34,6 +66,14 @@ struct ConcatFunctor {
}
}
};
};
template
<
typename
T
>
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
T
>
{
void
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_list
,
const
int32_t
axis
,
Tensor
*
output
);
};
}
// namepsace kernels
}
// namepsace kernels
}
// namespace mace
}
// namespace mace
...
...
mace/kernels/opencl/cl/concat.cl
0 → 100644
浏览文件 @
4743a1e6
#
include
<common.h>
DATA_TYPE4
stitch_vector
(
DATA_TYPE4
left,
DATA_TYPE4
right,
const
int
pos,
const
bool
reversed
)
{
if
(
!reversed
)
{
switch
(
pos
)
{
case
1:return
(
DATA_TYPE4
)(
left.x,
right.x,
right.y,
right.z
)
;
case
2:return
(
DATA_TYPE4
)(
left.x,
left.y,
right.x,
right.y
)
;
case
3:return
(
DATA_TYPE4
)(
left.x,
left.y,
left.z,
right.x
)
;
default:return
(
DATA_TYPE4
)
0
;
}
}
else
{
switch
(
pos
)
{
case
1:return
(
DATA_TYPE4
)(
left.w,
right.x,
right.y,
right.z
)
;
case
2:return
(
DATA_TYPE4
)(
left.z,
left.w,
right.x,
right.y
)
;
case
3:return
(
DATA_TYPE4
)(
left.y,
left.z,
left.w,
right.x
)
;
default:return
(
DATA_TYPE4
)
0
;
}
}
}
//
Supported
data
type:
half/float
__kernel
void
concat_channel
(
__read_only
image2d_t
input0,
__read_only
image2d_t
input1,
__private
const
int
input0_chan,
__write_only
image2d_t
output
)
{
const
int
chan_blk_idx
=
get_global_id
(
0
)
;
const
int
width_idx
=
get_global_id
(
1
)
;
const
int
width
=
get_global_size
(
1
)
;
const
int
hb_idx
=
get_global_id
(
2
)
;
const
int
input0_chan_blk
=
(
input0_chan
+
3
)
/
4
;
const
sampler_t
SAMPLER
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
DATA_TYPE4
data
=
0
;
#
ifdef
DIVISIBLE_FOUR
if
(
chan_blk_idx
+
1
<=
input0_chan_blk
)
{
data
=
READ_IMAGET
(
input0,
SAMPLER,
(
int2
)(
chan_blk_idx
*
width
+
width_idx,
hb_idx
))
;
}
else
{
data
=
READ_IMAGET
(
input1,
SAMPLER,
(
int2
)((
chan_blk_idx
-
input0_chan_blk
)
*
width
+
width_idx,
hb_idx
))
;
}
#
else
if
(
chan_blk_idx
+
1
<
input0_chan_blk
)
{
data
=
READ_IMAGET
(
input0,
SAMPLER,
(
int2
)(
chan_blk_idx
*
width
+
width_idx,
hb_idx
))
;
}
else
if
(
chan_blk_idx
>=
input0_chan_blk
)
{
const
int
in_chan_idx
=
chan_blk_idx
-
input0_chan_blk
;
DATA_TYPE4
data0
=
READ_IMAGET
(
input1,
SAMPLER,
(
int2
)(
in_chan_idx
*
width
+
width_idx,
hb_idx
))
;
DATA_TYPE4
data1
=
READ_IMAGET
(
input1,
SAMPLER,
(
int2
)((
in_chan_idx
+
1
)
*
width
+
width_idx,
hb_idx
))
;
data
=
stitch_vector
(
data0,
data1,
input0_chan
%
4
,
true
)
;
}
else
{
DATA_TYPE4
data0
=
READ_IMAGET
(
input0,
SAMPLER,
(
int2
)(
chan_blk_idx
*
width
+
width_idx,
hb_idx
))
;
DATA_TYPE4
data1
=
READ_IMAGET
(
input1,
SAMPLER,
(
int2
)(
width_idx,
hb_idx
))
;
data
=
stitch_vector
(
data0,
data1,
input0_chan
%
4
,
false
)
;
}
#
endif
WRITE_IMAGET
(
output,
(
int2
)(
chan_blk_idx
*
width
+
width_idx,
hb_idx
)
,
data
)
;
}
//__kernel
void
concat_width
(
__read_only
image2d_t
input0,
//
__read_only
image2d_t
input1,
//
__private
const
int
input0_width,
//
__write_only
image2d_t
output
)
{
//
const
int
chan_blk_idx
=
get_global_id
(
0
)
;
//
const
int
width_idx
=
get_global_id
(
1
)
;
//
const
int
width
=
get_global_size
(
1
)
;
//
const
int
hb_idx
=
get_global_id
(
2
)
;
//
//
const
sampler_t
SAMPLER
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
//
//
DATA_TYPE4
data
=
0
;
//
if
(
width_idx
<
input0_width
)
{
//
data
=
READ_IMAGET
(
input0,
//
SAMPLER,
//
(
int2
)(
chan_blk_idx
*
width
+
width_idx,
hb_idx
))
;
//
}
else
{
//
data
=
READ_IMAGET
(
input1,
//
SAMPLER,
//
(
int2
)(
chan_blk_idx
*
width
+
(
width_idx
-
input0_width
)
,
hb_idx
))
;
//
}
//
//
WRITE_IMAGET
(
output,
(
int2
)(
chan_blk_idx
*
width
+
width_idx,
hb_idx
)
,
data
)
;
//}
mace/kernels/opencl/concat.cc
0 → 100644
浏览文件 @
4743a1e6
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/concat.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
namespace
mace
{
namespace
kernels
{
static
void
Concat2
(
const
Tensor
*
input0
,
const
Tensor
*
input1
,
const
DataType
dt
,
Tensor
*
output
)
{
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
height
=
output
->
dim
(
1
);
const
index_t
width
=
output
->
dim
(
2
);
const
index_t
channel
=
output
->
dim
(
3
);
const
int
channel_blk
=
RoundUpDiv4
(
channel
);
auto
runtime
=
OpenCLRuntime
::
Get
();
std
::
set
<
std
::
string
>
built_options
;
if
(
input0
->
dtype
()
==
output
->
dtype
())
{
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToCLCMDDt
(
dt
));
}
else
{
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
}
if
(
input0
->
dim
(
3
)
%
4
==
0
)
{
built_options
.
emplace
(
"-DDIVISIBLE_FOUR"
);
}
auto
concat_kernel
=
runtime
->
BuildKernel
(
"concat"
,
"concat_channel"
,
built_options
);
uint32_t
idx
=
0
;
concat_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input0
->
buffer
())));
concat_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input1
->
buffer
())));
concat_kernel
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input0
->
dim
(
3
)));
concat_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
buffer
())));
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
concat_kernel
);
uint32_t
lws
[
3
];
lws
[
0
]
=
std
::
min
<
uint32_t
>
(
channel_blk
,
kwg_size
);
lws
[
1
]
=
std
::
min
<
uint32_t
>
(
width
,
kwg_size
/
lws
[
0
]);
lws
[
2
]
=
std
::
min
<
uint32_t
>
(
height
*
batch
,
kwg_size
/
(
lws
[
0
]
*
lws
[
1
]));
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
concat_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
static_cast
<
uint32_t
>
(
channel_blk
),
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
height
*
batch
)),
cl
::
NDRange
(
lws
[
0
],
lws
[
1
],
lws
[
2
]),
NULL
,
OpenCLRuntime
::
Get
()
->
GetDefaultEvent
());
MACE_CHECK
(
error
==
CL_SUCCESS
);
}
template
<
typename
T
>
void
ConcatFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_list
,
const
int32_t
axis
,
Tensor
*
output
)
{
const
int
inputs_count
=
input_list
.
size
()
-
1
;
MACE_CHECK
(
inputs_count
==
2
&&
axis
==
3
)
<<
"Concat opencl kernel only support two elements with axis == 3"
;
const
Tensor
*
input0
=
input_list
[
0
];
std
::
vector
<
index_t
>
output_shape
(
input0
->
shape
());
for
(
int
i
=
1
;
i
<
inputs_count
;
++
i
)
{
const
Tensor
*
input
=
input_list
[
i
];
MACE_CHECK
(
input
->
dim_size
()
==
input0
->
dim_size
(),
"Ranks of all input tensors must be same."
);
for
(
int
j
=
0
;
j
<
input
->
dim_size
();
++
j
)
{
if
(
j
==
axis
)
{
continue
;
}
MACE_CHECK
(
input
->
dim
(
j
)
==
input0
->
dim
(
j
),
"Dimensions of inputs should equal except axis."
);
}
output_shape
[
axis
]
+=
input
->
dim
(
axis
);
}
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT
,
image_shape
);
output
->
ResizeImage
(
output_shape
,
image_shape
);
switch
(
inputs_count
)
{
case
2
:
Concat2
(
input_list
[
0
],
input_list
[
1
],
DataTypeToEnum
<
T
>::
value
,
output
);
break
;
default:
MACE_NOT_IMPLEMENTED
;
}
};
template
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/ops/concat.cc
浏览文件 @
4743a1e6
...
@@ -7,8 +7,20 @@
...
@@ -7,8 +7,20 @@
namespace
mace
{
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Concat"
)
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Concat"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
.
Build
(),
ConcatOp
<
DeviceType
::
CPU
,
float
>
);
ConcatOp
<
DeviceType
::
CPU
,
float
>
);
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Concat"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
CPU
,
half
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Concat"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Concat"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
OPENCL
,
half
>
);
}
// namespace mace
}
// namespace mace
mace/ops/concat.h
浏览文件 @
4743a1e6
...
@@ -17,50 +17,24 @@ class ConcatOp : public Operator<D, T> {
...
@@ -17,50 +17,24 @@ class ConcatOp : public Operator<D, T> {
:
Operator
<
D
,
T
>
(
op_def
,
ws
)
{}
:
Operator
<
D
,
T
>
(
op_def
,
ws
)
{}
bool
Run
()
override
{
bool
Run
()
override
{
int32_t
value
s_count
=
this
->
InputSize
()
-
1
;
const
int32_t
input
s_count
=
this
->
InputSize
()
-
1
;
const
Tensor
*
input0
=
this
->
Input
(
0
);
const
std
::
vector
<
const
Tensor
*>
input_list
=
this
->
Inputs
(
);
const
Tensor
*
axis_tensor
=
this
->
Input
(
value
s_count
);
const
Tensor
*
axis_tensor
=
this
->
Input
(
input
s_count
);
MACE_CHECK
(
axis_tensor
->
dim_size
()
==
0
,
MACE_CHECK
(
axis_tensor
->
dim_size
()
==
0
,
"axis should be a scalar integer, but got shape: "
,
"axis should be a scalar integer, but got shape: "
,
axis_tensor
->
dim_size
());
axis_tensor
->
dim_size
());
Tensor
::
MappingGuard
axis_mapper
(
axis_tensor
);
const
int32_t
concat_axis
=
*
(
axis_tensor
->
data
<
int32_t
>
());
const
int32_t
concat_axis
=
*
(
axis_tensor
->
data
<
int32_t
>
());
const
int32_t
input_dims
=
input
0
->
dim_size
();
const
int32_t
input_dims
=
input
_list
[
0
]
->
dim_size
();
const
int32_t
axis
=
const
int32_t
axis
=
concat_axis
<
0
?
concat_axis
+
input_dims
:
concat_axis
;
concat_axis
<
0
?
concat_axis
+
input_dims
:
concat_axis
;
MACE_CHECK
((
0
<=
axis
&&
axis
<
input_dims
),
MACE_CHECK
((
0
<=
axis
&&
axis
<
input_dims
),
"Expected concatenating axis in the range ["
,
-
input_dims
,
", "
,
"Expected concatenating axis in the range ["
,
-
input_dims
,
", "
,
input_dims
,
"], but got"
,
concat_axis
);
input_dims
,
"], but got"
,
concat_axis
);
std
::
vector
<
index_t
>
output_shape
(
input0
->
shape
());
index_t
inner_size
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
inner_size
*=
output_shape
[
i
];
}
std
::
vector
<
index_t
>
outer_sizes
(
values_count
,
0
);
std
::
vector
<
const
T
*>
input_list
(
values_count
,
nullptr
);
input_list
[
0
]
=
input0
->
data
<
T
>
();
outer_sizes
[
0
]
=
input0
->
size
()
/
inner_size
;
const
Tensor
*
input
=
nullptr
;
for
(
int
i
=
1
;
i
<
values_count
;
++
i
)
{
input
=
this
->
Input
(
i
);
MACE_CHECK
(
input
->
dim_size
()
==
input0
->
dim_size
(),
"Ranks of all input tensors must be same."
);
for
(
int
j
=
0
;
j
<
axis_tensor
->
dim_size
();
++
j
)
{
if
(
j
==
axis
)
{
continue
;
}
MACE_CHECK
(
input
->
dim
(
j
)
==
input0
->
dim
(
j
),
"Dimensions of inputs should equal except axis."
);
}
input_list
[
i
]
=
input
->
data
<
T
>
();
outer_sizes
[
i
]
=
input
->
size
()
/
inner_size
;
output_shape
[
axis
]
+=
input
->
dim
(
axis
);
}
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
output
->
Resize
(
output_shape
);
functor_
(
input_list
,
inner_size
,
outer_sizes
.
data
(),
functor_
(
input_list
,
axis
,
output
);
output
->
mutable_data
<
T
>
());
return
true
;
return
true
;
}
}
...
...
mace/ops/concat_benchmark.cc
浏览文件 @
4743a1e6
...
@@ -38,14 +38,66 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) {
...
@@ -38,14 +38,66 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) {
}
}
}
}
static
void
BM_C
oncat
Dim0Float
(
int
iters
,
int
dim1
)
{
static
void
BM_C
ONCAT_
Dim0Float
(
int
iters
,
int
dim1
)
{
ConcatHelper
<
DeviceType
::
CPU
,
float
>
(
iters
,
0
,
dim1
);
ConcatHelper
<
DeviceType
::
CPU
,
float
>
(
iters
,
0
,
dim1
);
}
}
static
void
BM_C
oncat
Dim1Float
(
int
iters
,
int
dim1
)
{
static
void
BM_C
ONCAT_
Dim1Float
(
int
iters
,
int
dim1
)
{
ConcatHelper
<
DeviceType
::
CPU
,
float
>
(
iters
,
1
,
dim1
);
ConcatHelper
<
DeviceType
::
CPU
,
float
>
(
iters
,
1
,
dim1
);
}
}
BENCHMARK
(
BM_ConcatDim0Float
)
->
Arg
(
1000
)
->
Arg
(
100000
);
BENCHMARK
(
BM_CONCAT_Dim0Float
)
->
Arg
(
1000
)
->
Arg
(
100000
);
BENCHMARK
(
BM_ConcatDim1Float
)
->
Arg
(
1000
)
->
Arg
(
100000
);
BENCHMARK
(
BM_CONCAT_Dim1Float
)
->
Arg
(
1000
)
->
Arg
(
100000
);
template
<
typename
T
>
static
void
OpenclConcatHelper
(
int
iters
,
const
std
::
vector
<
index_t
>
&
shape0
,
const
std
::
vector
<
index_t
>
&
shape1
,
int
concat_dim
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input0"
,
shape0
);
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input1"
,
shape1
);
net
.
AddInputFromArray
<
DeviceType
::
OPENCL
,
int32_t
>
(
"Axis"
,
{},
{
concat_dim
});
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
"Input0"
,
"InputImage0"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
"Input1"
,
"InputImage1"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Concat"
,
"ConcatBM"
)
.
Input
(
"InputImage0"
)
.
Input
(
"InputImage1"
)
.
Input
(
"Axis"
)
.
Output
(
"OutputImage"
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
net
.
RunOp
(
DeviceType
::
OPENCL
);
}
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
(
net
.
GetTensor
(
"Input0"
)
->
size
()
+
net
.
GetTensor
(
"Input1"
)
->
size
());
mace
::
testing
::
ItemsProcessed
(
tot
);
testing
::
BytesProcessed
(
tot
*
sizeof
(
T
));
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
net
.
RunOp
(
DeviceType
::
OPENCL
);
}
}
static
void
BM_CONCATOPENCLFloat
(
int
iters
,
int
dim1
)
{
std
::
vector
<
index_t
>
shape
=
{
3
,
32
,
32
,
dim1
};
OpenclConcatHelper
<
float
>
(
iters
,
shape
,
shape
,
3
);
}
static
void
BM_CONCATOPENCLHalf
(
int
iters
,
int
dim1
)
{
std
::
vector
<
index_t
>
shape
=
{
3
,
32
,
32
,
dim1
};
OpenclConcatHelper
<
half
>
(
iters
,
shape
,
shape
,
3
);
}
BENCHMARK
(
BM_CONCATOPENCLFloat
)
->
Arg
(
32
)
->
Arg
(
64
)
->
Arg
(
128
)
->
Arg
(
256
);
BENCHMARK
(
BM_CONCATOPENCLHalf
)
->
Arg
(
32
)
->
Arg
(
64
)
->
Arg
(
128
)
->
Arg
(
256
);
}
// namespace mace
}
// namespace mace
\ No newline at end of file
mace/ops/concat_test.cc
浏览文件 @
4743a1e6
...
@@ -10,7 +10,7 @@ using namespace mace;
...
@@ -10,7 +10,7 @@ using namespace mace;
class
ConcatOpTest
:
public
OpsTestBase
{};
class
ConcatOpTest
:
public
OpsTestBase
{};
TEST_F
(
ConcatOpTest
,
Simple_
Horizon
)
{
TEST_F
(
ConcatOpTest
,
CPUSimple
Horizon
)
{
// Construct graph
// Construct graph
auto
&
net
=
test_net
();
auto
&
net
=
test_net
();
OpDefBuilder
(
"Concat"
,
"ConcatTest"
)
OpDefBuilder
(
"Concat"
,
"ConcatTest"
)
...
@@ -48,7 +48,7 @@ TEST_F(ConcatOpTest, Simple_Horizon) {
...
@@ -48,7 +48,7 @@ TEST_F(ConcatOpTest, Simple_Horizon) {
}
}
}
}
TEST_F
(
ConcatOpTest
,
Simple_
Vertical
)
{
TEST_F
(
ConcatOpTest
,
CPUSimple
Vertical
)
{
// Construct graph
// Construct graph
auto
&
net
=
test_net
();
auto
&
net
=
test_net
();
OpDefBuilder
(
"Concat"
,
"ConcatTest"
)
OpDefBuilder
(
"Concat"
,
"ConcatTest"
)
...
@@ -88,7 +88,7 @@ TEST_F(ConcatOpTest, Simple_Vertical) {
...
@@ -88,7 +88,7 @@ TEST_F(ConcatOpTest, Simple_Vertical) {
}
}
}
}
TEST_F
(
ConcatOpTest
,
Random
)
{
TEST_F
(
ConcatOpTest
,
CPU
Random
)
{
srand
(
time
(
nullptr
));
srand
(
time
(
nullptr
));
int
dim
=
5
;
int
dim
=
5
;
int
num_inputs
=
2
+
rand
()
%
10
;
int
num_inputs
=
2
+
rand
()
%
10
;
...
@@ -139,3 +139,88 @@ TEST_F(ConcatOpTest, Random) {
...
@@ -139,3 +139,88 @@ TEST_F(ConcatOpTest, Random) {
}
}
}
}
}
}
template
<
typename
T
>
void
OpenclRandomTest
(
const
std
::
vector
<
std
::
vector
<
index_t
>>
&
shapes
,
const
int
axis
)
{
srand
(
time
(
nullptr
));
int
num_inputs
=
2
;
int
concat_axis_size
=
0
;
// Construct graph
OpsTestNet
net
;
for
(
int
i
=
0
;
i
<
num_inputs
;
++
i
)
{
const
std
::
string
input_name
=
(
"Input"
+
ToString
(
i
)).
c_str
();
const
std
::
string
image_name
=
(
"InputImage"
+
ToString
(
i
)).
c_str
();
concat_axis_size
+=
shapes
[
i
][
axis
];
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
input_name
,
shapes
[
i
]);
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
input_name
,
image_name
,
kernels
::
BufferType
::
IN_OUT
);
}
net
.
AddInputFromArray
<
DeviceType
::
OPENCL
,
int
>
(
"Axis"
,
{},
{
axis
});
auto
builder
=
OpDefBuilder
(
"Concat"
,
"ConcatTest"
);
for
(
int
i
=
0
;
i
<
num_inputs
;
++
i
)
{
const
std
::
string
image_name
=
(
"InputImage"
+
ToString
(
i
)).
c_str
();
builder
=
builder
.
Input
(
image_name
);
}
builder
.
Input
(
"Axis"
)
.
Output
(
"OutputImage"
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
DeviceType
::
OPENCL
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
// Check
auto
output
=
net
.
GetOutput
(
"Output"
);
std
::
vector
<
index_t
>
expected_shape
=
shapes
[
0
];
expected_shape
[
axis
]
=
concat_axis_size
;
EXPECT_THAT
(
output
->
shape
(),
::
testing
::
ContainerEq
(
expected_shape
));
Tensor
::
MappingGuard
output_mapper
(
output
);
const
float
*
output_ptr
=
output
->
data
<
float
>
();
int
k
=
0
;
while
(
output_ptr
!=
(
output
->
data
<
float
>
()
+
output
->
size
()))
{
for
(
int
i
=
0
;
i
<
num_inputs
;
++
i
)
{
index_t
num_elements
=
std
::
accumulate
(
shapes
[
i
].
begin
()
+
axis
,
shapes
[
i
].
end
(),
1
,
std
::
multiplies
<
index_t
>
());
const
std
::
string
input_name
=
(
"Input"
+
ToString
(
i
)).
c_str
();
const
Tensor
*
input_tensor
=
net
.
GetTensor
(
input_name
.
data
());
Tensor
::
MappingGuard
input_guard
(
input_tensor
);
const
float
*
input_ptr
=
input_tensor
->
data
<
float
>
()
+
k
*
num_elements
;
for
(
int
j
=
0
;
j
<
num_elements
;
++
j
)
{
EXPECT_NEAR
(
*
(
input_ptr
+
j
),
*
output_ptr
++
,
1e-2
)
<<
"With index: "
<<
i
<<
", "
<<
j
;
}
}
k
++
;
}
}
TEST_F
(
ConcatOpTest
,
OPENCLAligned
)
{
OpenclRandomTest
<
float
>
({
{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
64
}
},
3
);
}
TEST_F
(
ConcatOpTest
,
OPENCLHalfAligned
)
{
OpenclRandomTest
<
half
>
({
{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
64
}
},
3
);
}
TEST_F
(
ConcatOpTest
,
OPENCLUnAligned
)
{
OpenclRandomTest
<
float
>
({
{
3
,
32
,
32
,
13
},
{
3
,
32
,
32
,
17
}
},
3
);
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录