Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
as350144
Mace
提交
a9832dfb
Mace
项目概览
as350144
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
2
Star
1
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a9832dfb
编写于
12月 04, 2017
作者:
Y
yejianwu
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'master' of v9.git.n.xiaomi.com:deep-learning/mace into bm_to_image
上级
6ed08429
ef167287
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
232 addition
and
138 deletion
+232
-138
mace/kernels/addn.h
mace/kernels/addn.h
+19
-9
mace/kernels/neon/addn_neon.cc
mace/kernels/neon/addn_neon.cc
+2
-2
mace/kernels/opencl/addn.cc
mace/kernels/opencl/addn.cc
+57
-26
mace/kernels/opencl/cl/addn.cl
mace/kernels/opencl/cl/addn.cl
+28
-15
mace/ops/addn.cc
mace/ops/addn.cc
+5
-0
mace/ops/addn.h
mace/ops/addn.h
+1
-2
mace/ops/addn_benchmark.cc
mace/ops/addn_benchmark.cc
+48
-26
mace/ops/addn_test.cc
mace/ops/addn_test.cc
+72
-58
未找到文件。
mace/kernels/addn.h
浏览文件 @
a9832dfb
...
...
@@ -10,15 +10,23 @@
namespace
mace
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
struct
AddNFunctor
{
void
operator
()(
std
::
vector
<
const
Tensor
*>
&
input_tensors
,
Tensor
*
output_tensor
)
{
struct
AddNFunctorBase
{};
template
<
DeviceType
D
,
typename
T
>
struct
AddNFunctor
:
AddNFunctorBase
{
void
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_tensors
,
Tensor
*
output_tensor
)
{
output_tensor
->
ResizeLike
(
input_tensors
[
0
]);
Tensor
::
MappingGuard
output_map
(
output_tensor
);
index_t
size
=
input_tensors
[
0
]
->
size
();
T
*
output_ptr
=
output_tensor
->
mutable_data
<
T
>
();
memset
(
output_ptr
,
0
,
size
*
sizeof
(
T
));
int
n
=
input_tensors
.
size
();
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
MACE_CHECK
(
input_tensors
[
i
]
->
dim
(
0
)
==
output_tensor
->
dim
(
0
));
MACE_CHECK
(
input_tensors
[
i
]
->
dim
(
1
)
==
output_tensor
->
dim
(
1
));
MACE_CHECK
(
input_tensors
[
i
]
->
dim
(
2
)
==
output_tensor
->
dim
(
2
));
MACE_CHECK
(
input_tensors
[
i
]
->
dim
(
3
)
==
output_tensor
->
dim
(
3
));
Tensor
::
MappingGuard
input_map
(
input_tensors
[
i
]);
const
T
*
input_ptr
=
input_tensors
[
i
]
->
data
<
T
>
();
for
(
index_t
j
=
0
;
j
<
size
;
++
j
)
{
...
...
@@ -28,15 +36,17 @@ struct AddNFunctor {
}
};
template
<
>
template
<
>
void
AddNFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
std
::
vector
<
const
Tensor
*>
&
input_tensors
,
Tensor
*
output_tensor
);
const
std
::
vector
<
const
Tensor
*>
&
input_tensors
,
Tensor
*
output_tensor
);
template
<
>
void
AddNFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
std
::
vector
<
const
Tensor
*>
&
inputs
,
Tensor
*
output
);
template
<
typename
T
>
struct
AddNFunctor
<
DeviceType
::
OPENCL
,
T
>
:
AddNFunctorBase
{
void
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_tensors
,
Tensor
*
output_tensor
);
};
}
// namespace kernels
}
// namespace mace
#endif // MACE_KERNELS_ADDN_H_
\ No newline at end of file
#endif // MACE_KERNELS_ADDN_H_
mace/kernels/neon/addn_neon.cc
浏览文件 @
a9832dfb
...
...
@@ -10,7 +10,7 @@ namespace kernels {
template
<
>
void
AddNFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
std
::
vector
<
const
Tensor
*>
&
input_tensors
,
Tensor
*
output_tensor
)
{
const
std
::
vector
<
const
Tensor
*>
&
input_tensors
,
Tensor
*
output_tensor
)
{
// TODO: neon mem copy
index_t
size
=
output_tensor
->
size
();
float
*
output_ptr
=
output_tensor
->
mutable_data
<
float
>
();
...
...
@@ -51,4 +51,4 @@ void AddNFunctor<DeviceType::NEON, float>::operator()(
};
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
}
// namespace mace
mace/kernels/opencl/addn.cc
浏览文件 @
a9832dfb
...
...
@@ -5,52 +5,83 @@
#include "mace/kernels/addn.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
namespace
mace
{
namespace
kernels
{
static
void
Add2
(
const
Tensor
*
input0
,
const
Tensor
*
input1
,
Tensor
*
output
)
{
index_t
element_size
=
input0
->
NumElements
();
index_t
blocks
=
(
element_size
+
3
)
/
4
;
template
<
typename
T
>
static
void
AddN
(
const
std
::
vector
<
const
Tensor
*>
&
input_tensors
,
Tensor
*
output
)
{
if
(
input_tensors
.
size
()
>
4
)
{
MACE_NOT_IMPLEMENTED
;
}
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
height
=
output
->
dim
(
1
);
const
index_t
width
=
output
->
dim
(
2
);
const
index_t
channels
=
output
->
dim
(
3
);
const
uint32_t
gws
=
blocks
;
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
const
index_t
width_pixels
=
channel_blocks
*
width
;
const
index_t
batch_height_pixels
=
batch
*
height
;
auto
runtime
=
OpenCLRuntime
::
Get
();
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
output
->
dtype
()));
auto
addn_kernel
=
runtime
->
BuildKernel
(
"addn"
,
"add2"
,
built_options
);
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
built_options
.
emplace
(
"-DINPUT_NUM="
+
ToString
(
input_tensors
.
size
()));
auto
addn_kernel
=
runtime
->
BuildKernel
(
"addn"
,
"addn"
,
built_options
);
const
uint32_t
lws
=
runtime
->
GetKernelMaxWorkGroupSize
(
addn_kernel
);
uint32_t
idx
=
0
;
addn_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
input0
->
buffer
())));
addn_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
input1
->
buffer
())));
addn_kernel
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
element_size
));
addn_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())));
for
(
auto
input
:
input_tensors
)
{
addn_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input
->
buffer
())));
}
addn_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
buffer
())));
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
addn_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gw
s
),
cl
::
NDRange
(
lws
),
NULL
,
OpenCLRuntime
::
Get
()
->
GetDefaultEvent
());
MACE_CHECK
(
error
==
CL_SUCCESS
);
cl
::
NDRange
(
width_pixels
,
batch_height_pixel
s
),
cl
::
NDRange
(
64
,
16
),
// TODO fix this
nullptr
,
OpenCLRuntime
::
Get
()
->
GetDefaultEvent
());
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
"error code: "
<<
error
;
}
template
<
>
void
AddNFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
std
::
vector
<
const
Tensor
*>
&
input_tensors
,
Tensor
*
output_tensor
)
{
if
(
input_tensors
.
empty
()
||
input_tensors
.
front
()
==
nullptr
)
{
return
;
}
template
<
typename
T
>
void
AddNFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_tensors
,
Tensor
*
output_tensor
)
{
size_t
size
=
input_tensors
.
size
();
MACE_CHECK
(
size
>=
2
&&
input_tensors
[
0
]
!=
nullptr
);
const
index_t
batch
=
input_tensors
[
0
]
->
dim
(
0
);
const
index_t
height
=
input_tensors
[
0
]
->
dim
(
1
);
const
index_t
width
=
input_tensors
[
0
]
->
dim
(
2
);
const
index_t
channels
=
input_tensors
[
0
]
->
dim
(
3
);
switch
(
size
)
{
case
2
:
Add2
(
input_tensors
[
0
],
input_tensors
[
1
],
output_tensor
);
break
;
default:
MACE_NOT_IMPLEMENTED
;
for
(
int
i
=
1
;
i
<
size
;
++
i
)
{
MACE_CHECK_NOTNULL
(
input_tensors
[
i
]);
MACE_CHECK
(
batch
==
input_tensors
[
i
]
->
dim
(
0
));
MACE_CHECK
(
height
==
input_tensors
[
i
]
->
dim
(
1
));
MACE_CHECK
(
width
==
input_tensors
[
i
]
->
dim
(
2
));
MACE_CHECK
(
channels
==
input_tensors
[
i
]
->
dim
(
3
));
}
std
::
vector
<
index_t
>
output_shape
=
input_tensors
[
0
]
->
shape
();
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT
,
output_image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
output_image_shape
);
AddN
<
T
>
(
input_tensors
,
output_tensor
);
};
template
struct
AddNFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
AddNFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
}
// namespace mace
mace/kernels/opencl/cl/addn.cl
浏览文件 @
a9832dfb
#
include
<common.h>
//
Supported
data
type:
half/float
__kernel
void
add2
(
__global
const
DATA_TYPE
*input0,
__global
const
DATA_TYPE
*input1,
__private
const
int
size,
__global
DATA_TYPE
*output
)
{
int
idx
=
get_global_id
(
0
)
;
__kernel
void
addn
(
__read_only
image2d_t
input0,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__read_only
image2d_t
input1,
#
if
INPUT_NUM
>
2
__read_only
image2d_t
input2,
#
endif
#
if
INPUT_NUM
>
3
__read_only
image2d_t
input3,
#
endif
__write_only
image2d_t
output
)
{
const
int
w
=
get_global_id
(
0
)
;
const
int
hb
=
get_global_id
(
1
)
;
if
(
idx
+
4
>
size
)
{
for
(
; idx < size; ++idx) {
*
(
output+idx
)
=
*
(
input0+idx
)
+
*
(
input1+idx
)
;
}
}
else
{
VEC_DATA_TYPE
(
DATA_TYPE,4
)
in_data0
=
vload4
(
idx,
input0
)
;
VEC_DATA_TYPE
(
DATA_TYPE,4
)
in_data1
=
vload4
(
idx,
input1
)
;
vstore4
(
in_data0+in_data1,
idx,
output
)
;
}
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
DATA_TYPE4
in0
=
READ_IMAGET
(
input0,
sampler,
(
int2
)(
w,
hb
))
;
DATA_TYPE4
in1
=
READ_IMAGET
(
input1,
sampler,
(
int2
)(
w,
hb
))
;
DATA_TYPE4
out
=
in0
+
in1
;
#
if
INPUT_NUM
>
2
DATA_TYPE4
in2
=
READ_IMAGET
(
input2,
sampler,
(
int2
)(
w,
hb
))
;
out
=
out
+
in2
;
#
endif
#
if
INPUT_NUM
>
3
DATA_TYPE4
in3
=
READ_IMAGET
(
input3,
sampler,
(
int2
)(
w,
hb
))
;
out
=
out
+
in3
;
#
endif
WRITE_IMAGET
(
output,
(
int2
)(
w,
hb
)
,
out
)
;
}
mace/ops/addn.cc
浏览文件 @
a9832dfb
...
...
@@ -23,4 +23,9 @@ REGISTER_OPENCL_OPERATOR(OpKeyBuilder("AddN")
.
Build
(),
AddNOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"AddN"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
AddNOp
<
DeviceType
::
OPENCL
,
half
>
);
}
// namespace mace
mace/ops/addn.h
浏览文件 @
a9832dfb
...
...
@@ -10,7 +10,7 @@
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
AddNOp
:
public
Operator
<
D
,
T
>
{
public:
AddNOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
...
...
@@ -18,7 +18,6 @@ class AddNOp : public Operator<D, T> {
bool
Run
()
override
{
Tensor
*
output_tensor
=
this
->
outputs_
[
0
];
output_tensor
->
ResizeLike
(
this
->
inputs_
[
0
]);
int
n
=
this
->
inputs_
.
size
();
vector
<
const
Tensor
*>
inputs
(
n
,
nullptr
);
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
...
...
mace/ops/addn_benchmark.cc
浏览文件 @
a9832dfb
...
...
@@ -9,47 +9,69 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
static
void
AddNBenchmark
(
int
iters
,
int
n
,
int
size
)
{
static
void
AddNBenchmark
(
int
iters
,
int
inputs
,
int
n
,
int
h
,
int
w
,
int
c
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
OpDefBuilder
op_def_builder
(
"AddN"
,
"AddNBM"
);
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
op_def_builder
.
Input
(
internal
::
MakeString
(
"Input"
,
i
).
c_str
());
// Add input data
for
(
int
i
=
0
;
i
<
inputs
;
++
i
)
{
net
.
AddRandomInput
<
D
,
float
>
(
internal
::
MakeString
(
"Input"
,
i
).
c_str
(),
{
n
,
h
,
w
,
c
});
}
op_def_builder
.
Output
(
"Output"
).
Finalize
(
net
.
NewOperatorDef
());
// Add input data
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
internal
::
MakeString
(
"Input"
,
i
).
c_str
(),
{
size
});
if
(
D
==
DeviceType
::
OPENCL
)
{
for
(
int
i
=
0
;
i
<
inputs
;
++
i
)
{
BufferToImage
<
D
,
T
>
(
net
,
internal
::
MakeString
(
"Input"
,
i
).
c_str
(),
internal
::
MakeString
(
"InputImage"
,
i
).
c_str
(),
kernels
::
BufferType
::
IN_OUT
);
}
OpDefBuilder
op_def_builder
(
"AddN"
,
"AddNBM"
);
for
(
int
i
=
0
;
i
<
inputs
;
++
i
)
{
op_def_builder
.
Input
(
internal
::
MakeString
(
"InputImage"
,
i
).
c_str
());
}
op_def_builder
.
Output
(
"OutputImage"
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
OpDefBuilder
op_def_builder
(
"AddN"
,
"AddNBM"
);
for
(
int
i
=
0
;
i
<
inputs
;
++
i
)
{
op_def_builder
.
Input
(
internal
::
MakeString
(
"Input"
,
i
).
c_str
());
}
op_def_builder
.
Output
(
"Output"
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
net
.
RunOp
(
D
);
net
.
Sync
();
}
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
net
.
RunOp
(
D
);
net
.
Sync
();
}
}
#define BM_ADDN_MACRO(N, SIZE, TYPE, DEVICE) \
static void BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * SIZE; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
AddNBenchmark<DEVICE, TYPE>(iters, N, SIZE); \
} \
BENCHMARK(BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE)
#define BM_ADDN(N, SIZE, TYPE) \
BM_ADDN_MACRO(N, SIZE, TYPE, CPU); \
BM_ADDN_MACRO(N, SIZE, TYPE, NEON);
BM_ADDN
(
10
,
1000
,
float
);
BM_ADDN
(
10
,
10000
,
float
);
BM_ADDN
(
100
,
1000
,
float
);
BM_ADDN
(
100
,
10000
,
float
);
}
// namespace mace
\ No newline at end of file
#define BM_ADDN_MACRO(INPUTS, N, H, W, C, TYPE, DEVICE) \
static void BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * H * W * C; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
AddNBenchmark<DEVICE, TYPE>(iters, INPUTS, N, H, W, C); \
} \
BENCHMARK(BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
#define BM_ADDN(INPUTS, N, H, W, C, TYPE) \
BM_ADDN_MACRO(INPUTS, N, H, W, C, TYPE, CPU); \
BM_ADDN_MACRO(INPUTS, N, H, W, C, TYPE, OPENCL);
BM_ADDN
(
2
,
1
,
240
,
240
,
256
,
float
);
// BM_ADDN(2, 1, 240, 240, 256, half);
BM_ADDN
(
4
,
1
,
240
,
240
,
256
,
float
);
// BM_ADDN(4, 1, 240, 240, 256, half);
}
// namespace mace
mace/ops/addn_test.cc
浏览文件 @
a9832dfb
...
...
@@ -9,7 +9,7 @@ namespace mace {
class
AddnOpTest
:
public
OpsTestBase
{};
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
SimpleAdd2
()
{
// Construct graph
OpsTestNet
net
;
...
...
@@ -20,30 +20,26 @@ void SimpleAdd2() {
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input1"
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Input2"
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Input1"
,
{
1
,
2
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Input2"
,
{
1
,
2
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
});
// Run
net
.
RunOp
(
D
);
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
2
,
4
,
6
,
8
,
10
,
12
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
2
,
3
,
1
},
{
2
,
4
,
6
,
8
,
10
,
12
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
TEST_F
(
AddnOpTest
,
CPUSimpleAdd2
)
{
SimpleAdd2
<
DeviceType
::
CPU
>
();
}
TEST_F
(
AddnOpTest
,
CPUSimpleAdd2
)
{
SimpleAdd2
<
DeviceType
::
CPU
>
();
}
TEST_F
(
AddnOpTest
,
NEONSimpleAdd2
)
{
SimpleAdd2
<
DeviceType
::
NEON
>
();
}
/*
TEST_F(AddnOpTest, NEONSimpleAdd2) { SimpleAdd2<DeviceType::NEON>(); }
TEST_F
(
AddnOpTest
,
OPENCLSimpleAdd2
)
{
SimpleAdd2
<
DeviceType
::
OPENCL
>
();
}
TEST_F(AddnOpTest, OPENCLSimpleAdd2) { SimpleAdd2<DeviceType::OPENCL>(); }
*/
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
SimpleAdd3
()
{
// Construct graph
OpsTestNet
net
;
...
...
@@ -55,62 +51,80 @@ void SimpleAdd3() {
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input1"
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Input2"
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Input3"
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Input1"
,
{
1
,
2
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Input2"
,
{
1
,
2
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Input3"
,
{
1
,
2
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
});
// Run
net
.
RunOp
(
D
);
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
3
,
6
,
9
,
12
,
15
,
18
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
2
,
3
,
1
},
{
3
,
6
,
9
,
12
,
15
,
18
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
TEST_F
(
AddnOpTest
,
CPUSimpleAdd3
)
{
SimpleAdd3
<
DeviceType
::
CPU
>
();
}
TEST_F
(
AddnOpTest
,
CPUSimpleAdd3
)
{
SimpleAdd3
<
DeviceType
::
CPU
>
();
}
TEST_F
(
AddnOpTest
,
NEONSimpleAdd3
)
{
SimpleAdd3
<
DeviceType
::
NEON
>
();
}
/*
TEST_F(AddnOpTest, NEONSimpleAdd3) { SimpleAdd3<DeviceType::NEON>(); }
*/
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
RandomTest
()
{
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"AddN"
,
"AddNTest"
)
.
Input
(
"Input1"
)
.
Input
(
"Input2"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input1"
,
{
1
,
2
,
3
,
4
});
net
.
AddRandomInput
<
D
,
float
>
(
"Input2"
,
{
1
,
2
,
3
,
4
});
// Check
net
.
RunOp
(
D
);
Tensor
result
;
result
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run
net
.
RunOp
();
ExpectTensorNear
<
float
>
(
*
net
.
GetOutput
(
"Output"
),
result
,
1e-5
);
}
TEST_F
(
AddnOpTest
,
CPURandom
)
{
RandomTest
<
DeviceType
::
CPU
>
();
testing
::
internal
::
LogToStderr
();
srand
(
time
(
NULL
));
for
(
int
round
=
0
;
round
<
10
;
++
round
)
{
// generate random input
index_t
n
=
1
+
(
rand
()
%
5
);
index_t
h
=
1
+
(
rand
()
%
100
);
index_t
w
=
1
+
(
rand
()
%
100
);
index_t
c
=
1
+
(
rand
()
%
32
);
int
input_num
=
2
+
rand
()
%
3
;
// Construct graph
OpsTestNet
net
;
auto
op_def
=
OpDefBuilder
(
"AddN"
,
"AddNTest"
);
for
(
int
i
=
0
;
i
<
input_num
;
++
i
)
{
op_def
.
Input
(
"Input"
+
ToString
(
i
));
}
op_def
.
Output
(
"Output"
).
Finalize
(
net
.
NewOperatorDef
());
// Add input data
for
(
int
i
=
0
;
i
<
input_num
;
++
i
)
{
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
+
ToString
(
i
),
{
n
,
h
,
w
,
c
});
}
// run on cpu
net
.
RunOp
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
for
(
int
i
=
0
;
i
<
input_num
;
++
i
)
{
BufferToImage
<
D
,
half
>
(
net
,
"Input"
+
ToString
(
i
),
"InputImage"
+
ToString
(
i
),
kernels
::
BufferType
::
IN_OUT
);
}
auto
op_def_cl
=
OpDefBuilder
(
"AddN"
,
"AddNTest"
);
for
(
int
i
=
0
;
i
<
input_num
;
++
i
)
{
op_def_cl
.
Input
(
"InputImage"
+
ToString
(
i
));
}
op_def_cl
.
Output
(
"OutputImage"
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataType
::
DT_HALF
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run on device
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.1
);
}
}
TEST_F
(
AddnOpTest
,
NEONRandom
)
{
RandomTest
<
DeviceType
::
NEON
>
();
}
TEST_F
(
AddnOpTest
,
OPENCLRandom
)
{
RandomTest
<
DeviceType
::
OPENCL
>
();
}
TEST_F
(
AddnOpTest
,
OPENCLRandom
)
{
RandomTest
<
DeviceType
::
OPENCL
>
();
}
}
// namespace mace
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录