Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
冰之2023
Mace
提交
b7a95857
Mace
项目概览
冰之2023
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
b7a95857
编写于
12月 04, 2017
作者:
Y
yejianwu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update relu buffer to image
上级
fd284f6a
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
199 addition
and
101 deletion
+199
-101
mace/kernels/opencl/cl/relu.cl
mace/kernels/opencl/cl/relu.cl
+20
-26
mace/kernels/opencl/relu_opencl.cc
mace/kernels/opencl/relu_opencl.cc
+27
-15
mace/kernels/relu.h
mace/kernels/relu.h
+8
-4
mace/ops/relu.cc
mace/ops/relu.cc
+10
-1
mace/ops/relu.h
mace/ops/relu.h
+1
-1
mace/ops/relu_benchmark.cc
mace/ops/relu_benchmark.cc
+37
-24
mace/ops/relu_test.cc
mace/ops/relu_test.cc
+96
-30
未找到文件。
mace/kernels/opencl/cl/relu.cl
浏览文件 @
b7a95857
#
include
<common.h>
//
Supported
data
type:
half/float
__kernel
void
relu
(
__global
const
DATA_TYPE
*input,
__private
const
int
size,
__global
DATA_TYPE
*output
)
{
int
idx
=
get_global_id
(
0
)
;
__kernel
void
relu
(
__read_only
image2d_t
input,
__write_only
image2d_t
output
)
{
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
const
int
width
=
get_global_size
(
1
)
;
if
(
idx
+
4
>
size
)
{
for
(
; idx < size; ++idx) {
*
(
output+idx
)
=
fmax
(
*
(
input+idx
)
,
0
)
;
}
}
else
{
VEC_DATA_TYPE
(
DATA_TYPE,4
)
data
=
vload4
(
idx,
input
)
;
data
=
fmax
(
data,
(
VEC_DATA_TYPE
(
DATA_TYPE,4
))
0
)
;
vstore4
(
data,
idx,
output
)
;
}
const
int
pos
=
ch_blk
*
width
+
w
;
DATA_TYPE4
in
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
pos,
hb
))
;
DATA_TYPE4
out
=
fmax
(
in,
(
DATA_TYPE4
)
0
)
;
WRITE_IMAGET
(
output,
(
int2
)(
pos,
hb
)
,
out
)
;
}
__kernel
void
relux
(
__
global
const
DATA_TYPE
*
input,
__kernel
void
relux
(
__
read_only
image2d_t
input,
__private
const
DATA_TYPE
max_limit,
__private
const
int
size,
__global
DATA_TYPE
*output
)
{
int
idx
=
get_global_id
(
0
)
;
__write_only
image2d_t
output
)
{
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
const
int
width
=
get_global_size
(
1
)
;
if
(
idx
+
4
>
size
)
{
for
(
; idx < size; ++idx) {
*
(
output+idx
)
=
clamp
(
*
(
input+idx
)
,
0.0f,
max_limit
)
;
}
}
else
{
VEC_DATA_TYPE
(
DATA_TYPE,4
)
data
=
vload4
(
idx,
input
)
;
data
=
clamp
(
data,
(
VEC_DATA_TYPE
(
DATA_TYPE,4
))
0
,
(
VEC_DATA_TYPE
(
DATA_TYPE,4
))
max_limit
)
;
vstore4
(
data,
idx,
output
)
;
}
const
int
pos
=
ch_blk
*
width
+
w
;
DATA_TYPE4
in
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
pos,
hb
))
;
DATA_TYPE4
out
=
clamp
(
in,
(
DATA_TYPE4
)
0
,
(
DATA_TYPE4
)
max_limit
)
;
WRITE_IMAGET
(
output,
(
int2
)(
pos,
hb
)
,
out
)
;
}
mace/kernels/opencl/relu_opencl.cc
浏览文件 @
b7a95857
...
...
@@ -6,58 +6,70 @@
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
namespace
mace
{
namespace
kernels
{
template
<
>
void
ReluFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
const
Tensor
*
input
,
template
<
typename
T
>
void
ReluFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
Tensor
*
output
)
{
index_t
element_size
=
input
->
NumElements
();
index_t
blocks
=
(
element_size
+
3
)
/
4
;
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
height
=
input
->
dim
(
1
);
const
index_t
width
=
input
->
dim
(
2
);
const
index_t
channels
=
input
->
dim
(
3
);
const
uint32_t
gws
=
blocks
;
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
auto
runtime
=
OpenCLRuntime
::
Get
();
auto
program
=
runtime
->
program
();
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DataTypeToCLType
(
input
->
dtype
()));
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
if
(
max_limit_
<
0
)
{
auto
relu_kernel
=
runtime
->
BuildKernel
(
"relu"
,
"relu"
,
built_options
);
const
uint32_t
lws
=
runtime
->
GetKernelMaxWorkGroupSize
(
relu_kernel
);
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
relu_kernel
);
const
uint32_t
lws
[
3
]
=
{
1
,
kwg_size
,
1
};
uint32_t
idx
=
0
;
relu_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
input
->
buffer
())));
relu_kernel
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
element_size
));
relu_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())));
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
relu_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
),
cl
::
NDRange
(
lws
),
cl
::
NDRange
(
gws
[
0
],
gws
[
1
],
gws
[
2
]
),
cl
::
NDRange
(
lws
[
0
],
lws
[
1
],
lws
[
2
]
),
NULL
,
OpenCLRuntime
::
Get
()
->
GetDefaultEvent
());
MACE_CHECK
(
error
==
CL_SUCCESS
);
}
else
{
auto
relu_kernel
=
runtime
->
BuildKernel
(
"relu"
,
"relux"
,
built_options
);
const
uint32_t
lws
=
runtime
->
GetKernelMaxWorkGroupSize
(
relu_kernel
)
;
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
relu_kernel
);
const
uint32_t
lws
[
3
]
=
{
1
,
kwg_size
,
1
}
;
uint32_t
idx
=
0
;
relu_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
input
->
buffer
())));
relu_kernel
.
setArg
(
idx
++
,
max_limit_
);
relu_kernel
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
element_size
));
relu_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())));
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
relu_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
),
cl
::
NDRange
(
lws
),
cl
::
NDRange
(
gws
[
0
],
gws
[
1
],
gws
[
2
]
),
cl
::
NDRange
(
lws
[
0
],
lws
[
1
],
lws
[
2
]
),
NULL
,
OpenCLRuntime
::
Get
()
->
GetDefaultEvent
());
MACE_CHECK
(
error
==
CL_SUCCESS
);
}
}
template
struct
ReluFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
ReluFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/relu.h
浏览文件 @
b7a95857
...
...
@@ -33,11 +33,15 @@ struct ReluFunctor {
template
<
>
void
ReluFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
Tensor
*
input
,
Tensor
*
output
);
template
<
>
void
ReluFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
const
Tensor
*
input
,
Tensor
*
output
);
template
<
typename
T
>
struct
ReluFunctor
<
DeviceType
::
OPENCL
,
T
>
{
T
max_limit_
;
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
);
};
}
// namespace kernels
}
// namespace mace
#endif // MACE_KERNELS_RELU_H_
\ No newline at end of file
#endif // MACE_KERNELS_RELU_H_
mace/ops/relu.cc
浏览文件 @
b7a95857
...
...
@@ -12,5 +12,14 @@ REGISTER_CPU_OPERATOR(Relu, ReluOp<DeviceType::CPU, float>);
REGISTER_NEON_OPERATOR
(
Relu
,
ReluOp
<
DeviceType
::
NEON
,
float
>
);
#endif // __ARM_NEON
REGISTER_OPENCL_OPERATOR
(
Relu
,
ReluOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Relu"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ReluOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Relu"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ReluOp
<
DeviceType
::
OPENCL
,
half
>
);
}
// namespace mace
mace/ops/relu.h
浏览文件 @
b7a95857
...
...
@@ -16,7 +16,7 @@ class ReluOp : public Operator<D, T> {
ReluOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{
functor_
.
max_limit_
=
OperatorBase
::
GetSingleArgument
<
T
>
(
"max_limit"
,
static_cast
<
T
>
(
-
1
));
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
static_cast
<
float
>
(
-
1
));
}
bool
Run
()
override
{
const
Tensor
*
input_tensor
=
this
->
inputs_
[
0
];
...
...
mace/ops/relu_benchmark.cc
浏览文件 @
b7a95857
...
...
@@ -9,17 +9,28 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
static
void
ReluBenchmark
(
int
iters
,
int
size
)
{
static
void
ReluBenchmark
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
OpDefBuilder
(
"Relu"
,
"ReluBM"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
size
});
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluBM"
)
.
Input
(
"InputImage"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
OpDefBuilder
(
"Relu"
,
"ReluBM"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
}
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
...
...
@@ -34,21 +45,23 @@ static void ReluBenchmark(int iters, int size) {
net
.
Sync
();
}
#define BM_RELU_MACRO(SIZE, TYPE, DEVICE) \
static void BM_RELU_##SIZE##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * SIZE; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ReluBenchmark<DEVICE, TYPE>(iters, SIZE); \
} \
BENCHMARK(BM_RELU_##SIZE##_##TYPE##_##DEVICE)
#define BM_RELU(SIZE, TYPE) \
BM_RELU_MACRO(SIZE, TYPE, CPU); \
BM_RELU_MACRO(SIZE, TYPE, NEON);\
BM_RELU_MACRO(SIZE, TYPE, OPENCL);
BM_RELU
(
1000
,
float
);
BM_RELU
(
100000
,
float
);
BM_RELU
(
10000000
,
float
);
}
// namespace mace
\ No newline at end of file
#define BM_RELU_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_RELU_##N##C##H##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ReluBenchmark<DEVICE, TYPE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_RELU_##N##C##H##W##_##TYPE##_##DEVICE)
#define BM_RELU(N, C, H, W, TYPE) \
BM_RELU_MACRO(N, C, H, W, TYPE, CPU); \
BM_RELU_MACRO(N, C, H, W, TYPE, NEON);\
BM_RELU_MACRO(N, C, H, W, TYPE, OPENCL);
BM_RELU
(
1
,
1
,
512
,
512
,
float
);
BM_RELU
(
1
,
3
,
128
,
128
,
float
);
BM_RELU
(
1
,
3
,
512
,
512
,
float
);
BM_RELU
(
1
,
32
,
112
,
112
,
float
);
BM_RELU
(
1
,
64
,
256
,
256
,
float
);
}
// namespace mace
mace/ops/relu_test.cc
浏览文件 @
b7a95857
...
...
@@ -12,10 +12,6 @@ class ReluOpTest : public OpsTestBase {};
template
<
DeviceType
D
>
void
TestSimple
()
{
OpsTestNet
net
;
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
...
...
@@ -23,8 +19,28 @@ void TestSimple() {
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
,
4
,
-
3
,
3
,
-
2
,
2
,
-
1
,
1
,
0
,
0
});
// Run
net
.
RunOp
(
D
);
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
}
auto
expected
=
CreateTensor
<
float
>
({
2
,
2
,
2
,
2
},
{
0
,
7
,
0
,
6
,
0
,
5
,
0
,
4
,
...
...
@@ -48,20 +64,36 @@ TEST_F(ReluOpTest, OPENCLSimple) {
template
<
DeviceType
D
>
void
TestUnalignedSimple
()
{
OpsTestNet
net
;
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
1
,
3
,
2
},
{
1
,
3
,
2
,
1
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
});
// Run
net
.
RunOp
(
D
);
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
}
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
3
,
2
},
auto
expected
=
CreateTensor
<
float
>
({
1
,
3
,
2
,
1
},
{
0
,
7
,
0
,
6
,
0
,
5
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
...
...
@@ -82,11 +114,6 @@ TEST_F(ReluOpTest, OPENCLUnalignedSimple) {
template
<
DeviceType
D
>
void
TestSimpleReluX
()
{
OpsTestNet
net
;
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
AddFloatArg
(
"max_limit"
,
6
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
...
...
@@ -94,8 +121,30 @@ void TestSimpleReluX() {
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
,
4
,
-
3
,
3
,
-
2
,
2
,
-
1
,
1
,
0
,
0
});
// Run
net
.
RunOp
(
D
);
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
AddFloatArg
(
"max_limit"
,
6
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
AddFloatArg
(
"max_limit"
,
6
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
}
auto
expected
=
CreateTensor
<
float
>
({
2
,
2
,
2
,
2
},
{
0
,
6
,
0
,
6
,
0
,
5
,
0
,
4
,
...
...
@@ -119,21 +168,38 @@ TEST_F(ReluOpTest, OPENCLSimpleReluX) {
template
<
DeviceType
D
>
void
TestUnalignedSimpleReluX
()
{
OpsTestNet
net
;
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
AddFloatArg
(
"max_limit"
,
6
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
1
,
1
,
7
},
{
1
,
1
,
7
,
1
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
});
// Run
net
.
RunOp
(
D
);
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
AddFloatArg
(
"max_limit"
,
6
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
AddFloatArg
(
"max_limit"
,
6
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
}
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
1
,
7
},
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
7
,
1
},
{
0
,
6
,
0
,
6
,
0
,
5
,
0
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录