Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
13a7f090
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
13a7f090
编写于
10月 11, 2018
作者:
叶
叶剑武
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'optimize_deconv_cpu' into 'master'
fix deconv bias add See merge request !826
上级
1d40ca50
f1c57df7
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
371 addition
and
214 deletion
+371
-214
mace/kernels/deconv_2d.h
mace/kernels/deconv_2d.h
+34
-34
mace/kernels/opencl/deconv_2d.cc
mace/kernels/opencl/deconv_2d.cc
+10
-14
mace/ops/deconv_2d.h
mace/ops/deconv_2d.h
+20
-9
mace/ops/deconv_2d_benchmark.cc
mace/ops/deconv_2d_benchmark.cc
+9
-2
mace/ops/deconv_2d_test.cc
mace/ops/deconv_2d_test.cc
+262
-146
mace/python/tools/converter_tool/base_converter.py
mace/python/tools/converter_tool/base_converter.py
+6
-0
mace/python/tools/converter_tool/caffe_converter.py
mace/python/tools/converter_tool/caffe_converter.py
+5
-0
mace/python/tools/converter_tool/tensorflow_converter.py
mace/python/tools/converter_tool/tensorflow_converter.py
+12
-7
mace/python/tools/converter_tool/transformer.py
mace/python/tools/converter_tool/transformer.py
+13
-2
未找到文件。
mace/kernels/deconv_2d.h
浏览文件 @
13a7f090
...
...
@@ -34,19 +34,24 @@
namespace
mace
{
namespace
kernels
{
enum
FrameworkType
{
TENSORFLOW
=
0
,
CAFFE
=
1
,
};
struct
Deconv2dFunctorBase
:
OpKernel
{
Deconv2dFunctorBase
(
OpKernelContext
*
context
,
const
std
::
vector
<
int
>
&
strides
,
const
Padding
&
padding_type
,
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
index_t
>
&
output_sha
pe
,
const
FrameworkType
model_ty
pe
,
const
ActivationType
activation
,
const
float
relux_max_limit
)
:
OpKernel
(
context
),
strides_
(
strides
),
padding_type_
(
padding_type
),
paddings_
(
paddings
),
output_shape_
(
output_sha
pe
),
model_type_
(
model_ty
pe
),
activation_
(
activation
),
relux_max_limit_
(
relux_max_limit
)
{}
...
...
@@ -156,7 +161,7 @@ struct Deconv2dFunctorBase : OpKernel {
std
::
vector
<
int
>
strides_
;
// [stride_h, stride_w]
const
Padding
padding_type_
;
std
::
vector
<
int
>
paddings_
;
std
::
vector
<
index_t
>
output_sha
pe_
;
const
FrameworkType
model_ty
pe_
;
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
};
...
...
@@ -171,14 +176,14 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase {
const
std
::
vector
<
int
>
&
strides
,
const
Padding
&
padding_type
,
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
index_t
>
&
output_sha
pe
,
const
FrameworkType
model_ty
pe
,
const
ActivationType
activation
,
const
float
relux_max_limit
)
:
Deconv2dFunctorBase
(
context
,
strides
,
padding_type
,
paddings
,
output_sha
pe
,
model_ty
pe
,
activation
,
relux_max_limit
)
{}
...
...
@@ -277,19 +282,16 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase {
std
::
vector
<
int
>
paddings
(
2
);
std
::
vector
<
int
>
out_paddings
(
2
);
std
::
vector
<
index_t
>
output_shape
(
4
);
if
(
paddings_
.
empty
()
)
{
// tensorflow
if
(
model_type_
==
FrameworkType
::
TENSORFLOW
)
{
// tensorflow
paddings
=
std
::
vector
<
int
>
(
2
,
0
);
if
(
output_shape_
.
size
()
==
4
)
{
output_shape
=
output_shape_
;
}
else
{
MACE_CHECK_NOTNULL
(
output_shape_tensor
);
MACE_CHECK
(
output_shape_tensor
->
size
()
==
4
);
Tensor
::
MappingGuard
output_shape_mapper
(
output_shape_tensor
);
auto
output_shape_data
=
output_shape_tensor
->
data
<
int32_t
>
();
output_shape
=
std
::
vector
<
index_t
>
(
output_shape_data
,
output_shape_data
+
4
);
}
MACE_CHECK_NOTNULL
(
output_shape_tensor
);
MACE_CHECK
(
output_shape_tensor
->
size
()
==
4
);
Tensor
::
MappingGuard
output_shape_mapper
(
output_shape_tensor
);
auto
output_shape_data
=
output_shape_tensor
->
data
<
int32_t
>
();
output_shape
=
std
::
vector
<
index_t
>
(
output_shape_data
,
output_shape_data
+
4
);
const
index_t
t
=
output_shape
[
1
];
output_shape
[
1
]
=
output_shape
[
3
];
output_shape
[
3
]
=
output_shape
[
2
];
...
...
@@ -437,21 +439,6 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase {
padded_out_h
==
output_shape
[
2
]
&&
padded_out_w
==
output_shape
[
3
];
float
*
out_data
=
no_pad
?
output_data
:
padded_out_data
;
if
(
bias_data
!=
nullptr
)
{
const
index_t
batch
=
output_shape
[
0
];
const
index_t
channels
=
output_shape
[
1
];
const
index_t
img_size
=
output_shape
[
2
]
*
output_shape
[
3
];
#pragma omp parallel for collapse(3)
for
(
index_t
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
index_t
i
=
0
;
i
<
img_size
;
++
i
)
{
output_data
[(
b
*
channels
+
c
)
*
img_size
+
i
]
+=
bias_data
[
c
];
}
}
}
}
deconv_func
(
input_data
,
filter_data
,
in_shape
,
...
...
@@ -466,7 +453,20 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase {
output_data
);
}
if
(
bias_data
!=
nullptr
)
{
const
index_t
batch
=
output_shape
[
0
];
const
index_t
channels
=
output_shape
[
1
];
const
index_t
img_size
=
output_shape
[
2
]
*
output_shape
[
3
];
#pragma omp parallel for collapse(3)
for
(
index_t
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
index_t
i
=
0
;
i
<
img_size
;
++
i
)
{
output_data
[(
b
*
channels
+
c
)
*
img_size
+
i
]
+=
bias_data
[
c
];
}
}
}
}
DoActivation
<
float
>
(
output_data
,
output_data
,
...
...
@@ -501,7 +501,7 @@ struct Deconv2dFunctor<DeviceType::GPU, T> : Deconv2dFunctorBase {
const
std
::
vector
<
int
>
&
strides
,
const
Padding
&
padding_type
,
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
index_t
>
&
output_sha
pe
,
const
FrameworkType
model_ty
pe
,
const
ActivationType
activation
,
const
float
relux_max_limit
);
...
...
mace/kernels/opencl/deconv_2d.cc
浏览文件 @
13a7f090
...
...
@@ -24,14 +24,14 @@ Deconv2dFunctor<DeviceType::GPU, T>::Deconv2dFunctor(
const
std
::
vector
<
int
>
&
strides
,
const
Padding
&
padding_type
,
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
index_t
>
&
output_sha
pe
,
const
FrameworkType
model_ty
pe
,
const
ActivationType
activation
,
const
float
relux_max_limit
)
:
Deconv2dFunctorBase
(
context
,
strides
,
padding_type
,
paddings
,
output_sha
pe
,
model_ty
pe
,
activation
,
relux_max_limit
)
{
if
(
context
->
device
()
->
opencl_runtime
()
->
UseImageMemory
())
{
...
...
@@ -55,19 +55,15 @@ MaceStatus Deconv2dFunctor<DeviceType::GPU, T>::operator()(
std
::
vector
<
int
>
paddings
(
2
);
std
::
vector
<
int
>
out_paddings
(
2
);
std
::
vector
<
index_t
>
output_shape
(
4
);
if
(
paddings_
.
empty
()
)
{
if
(
model_type_
==
FrameworkType
::
TENSORFLOW
)
{
paddings
=
std
::
vector
<
int
>
(
2
,
0
);
if
(
output_shape_
.
size
()
!=
4
)
{
MACE_CHECK_NOTNULL
(
output_shape_tensor
);
MACE_CHECK
(
output_shape_tensor
->
size
()
==
4
);
Tensor
::
MappingGuard
output_shape_mapper
(
output_shape_tensor
);
auto
output_shape_data
=
output_shape_tensor
->
data
<
int32_t
>
();
output_shape
=
std
::
vector
<
index_t
>
(
output_shape_data
,
output_shape_data
+
4
);
}
else
{
output_shape
=
output_shape_
;
}
MACE_CHECK_NOTNULL
(
output_shape_tensor
);
MACE_CHECK
(
output_shape_tensor
->
size
()
==
4
);
Tensor
::
MappingGuard
output_shape_mapper
(
output_shape_tensor
);
auto
output_shape_data
=
output_shape_tensor
->
data
<
int32_t
>
();
output_shape
=
std
::
vector
<
index_t
>
(
output_shape_data
,
output_shape_data
+
4
);
CalcDeconvPaddingAndInputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
strides_
.
data
(),
...
...
mace/ops/deconv_2d.h
浏览文件 @
13a7f090
...
...
@@ -34,28 +34,39 @@ class Deconv2dOp : public Operator<D, T> {
static_cast
<
Padding
>
(
OperatorBase
::
GetOptionalArg
<
int
>
(
"padding"
,
static_cast
<
int
>
(
SAME
))),
OperatorBase
::
GetRepeatedArgs
<
int
>
(
"padding_values"
),
OperatorBase
::
GetRepeatedArgs
<
index_t
>
(
"output_shape"
),
static_cast
<
kernels
::
FrameworkType
>
(
OperatorBase
::
GetOptionalArg
<
int
>
(
"framework_type"
,
0
)),
kernels
::
StringToActivationType
(
OperatorBase
::
GetOptionalArg
<
std
::
string
>
(
"activation"
,
"NOOP"
)),
OperatorBase
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
))
{}
MaceStatus
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
const
Tensor
*
filter
=
this
->
Input
(
FILTER
);
const
Tensor
*
output_shape
=
this
->
InputSize
()
>=
3
?
this
->
Input
(
OUTPUT_SHAPE
)
:
nullptr
;
const
Tensor
*
bias
=
this
->
InputSize
()
>=
4
?
this
->
Input
(
BIAS
)
:
nullptr
;
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
MACE_CHECK
(
this
->
InputSize
()
>=
2
,
"deconv needs >= 2 inputs."
);
const
Tensor
*
input
=
this
->
Input
(
0
);
const
Tensor
*
filter
=
this
->
Input
(
1
);
const
kernels
::
FrameworkType
model_type
=
static_cast
<
kernels
::
FrameworkType
>
(
OperatorBase
::
GetOptionalArg
<
int
>
(
"framework_type"
,
0
));
if
(
model_type
==
kernels
::
CAFFE
)
{
const
Tensor
*
bias
=
this
->
InputSize
()
>=
3
?
this
->
Input
(
2
)
:
nullptr
;
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
return
functor_
(
input
,
filter
,
bias
,
output_shape
,
output
,
future
);
return
functor_
(
input
,
filter
,
bias
,
nullptr
,
output
,
future
);
}
else
{
const
Tensor
*
output_shape
=
this
->
InputSize
()
>=
3
?
this
->
Input
(
2
)
:
nullptr
;
const
Tensor
*
bias
=
this
->
InputSize
()
>=
4
?
this
->
Input
(
3
)
:
nullptr
;
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
return
functor_
(
input
,
filter
,
bias
,
output_shape
,
output
,
future
);
}
}
private:
kernels
::
Deconv2dFunctor
<
D
,
T
>
functor_
;
protected:
MACE_OP_INPUT_TAGS
(
INPUT
,
FILTER
,
OUTPUT_SHAPE
,
BIAS
);
MACE_OP_OUTPUT_TAGS
(
OUTPUT
);
};
...
...
mace/ops/deconv_2d_benchmark.cc
浏览文件 @
13a7f090
...
...
@@ -49,28 +49,35 @@ static void Deconv2d(int iters,
net
.
AddRandomInput
<
D
,
float
>
(
"Filter"
,
{
output_channels
,
channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
output_channels
});
net
.
AddInputFromArray
<
D
,
int32_t
>
(
"OutputShape"
,
{
4
},
{
batch
,
out_h
,
out_w
,
output_channels
});
if
(
D
==
DeviceType
::
GPU
)
{
BufferToImage
<
D
,
T
>
(
&
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
&
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
CONV2D_FILTER
);
BufferToImage
<
D
,
T
>
(
&
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"OutputShape"
)
.
Input
(
"BiasImage"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
padding
)
.
AddIntsArg
(
"output_shape"
,
{
batch
,
out_h
,
out_w
,
output_channels
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"OutputShape"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
padding
)
.
AddIntsArg
(
"output_shape"
,
{
batch
,
out_h
,
out_w
,
output_channels
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
...
...
mace/ops/deconv_2d_test.cc
浏览文件 @
13a7f090
...
...
@@ -28,34 +28,58 @@ namespace {
template
<
DeviceType
D
>
void
RunTestSimple
(
const
std
::
vector
<
index_t
>
&
input_shape
,
const
std
::
vector
<
float
>
&
input_data
,
const
std
::
vector
<
float
>
&
bias_data
,
const
int
stride
,
Padding
padding
,
const
std
::
vector
<
int
>
&
padding_size
,
const
std
::
vector
<
int
>
&
output_shape
,
const
std
::
vector
<
int
32_t
>
&
output_shape
,
const
std
::
vector
<
index_t
>
&
filter_shape
,
const
std
::
vector
<
float
>
&
filter_data
,
const
std
::
vector
<
index_t
>
&
expected_shape
,
const
std
::
vector
<
float
>
&
expected_data
)
{
const
std
::
vector
<
float
>
&
expected_data
,
kernels
::
FrameworkType
model_type
)
{
OpsTestNet
net
;
// Add input data
const
index_t
batch
=
input_shape
[
0
];
const
index_t
out_channels
=
filter_shape
[
2
];
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
input_shape
,
input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
filter_shape
,
filter_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
out_channels
},
bias_data
);
net
.
TransformDataFormat
<
D
,
float
>
(
"Filter"
,
HWOI
,
"FilterOIHW"
,
OIHW
);
if
(
D
==
DeviceType
::
GPU
)
{
BufferToImage
<
D
,
float
>
(
&
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
float
>
(
&
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
&
net
,
"FilterOIHW"
,
"FilterImage"
,
kernels
::
BufferType
::
CONV2D_FILTER
);
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
padding
)
.
AddIntsArg
(
"padding_values"
,
padding_size
)
.
AddIntsArg
(
"output_shape"
,
output_shape
)
.
Finalize
(
net
.
NewOperatorDef
());
if
(
model_type
==
kernels
::
FrameworkType
::
CAFFE
)
{
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"BiasImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
padding
)
.
AddIntsArg
(
"padding_values"
,
padding_size
)
.
AddIntArg
(
"framework_type"
,
model_type
)
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
net
.
AddInputFromArray
<
D
,
int32_t
>
(
"OutputShape"
,
{
4
},
output_shape
);
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"OutputShape"
)
.
Input
(
"BiasImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
padding
)
.
AddIntsArg
(
"padding_values"
,
padding_size
)
.
AddIntArg
(
"framework_type"
,
model_type
)
.
Finalize
(
net
.
NewOperatorDef
());
}
net
.
RunOp
(
D
);
// Transfer output
...
...
@@ -64,15 +88,34 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
}
else
{
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"InputNCHW"
)
.
Input
(
"FilterOIHW"
)
.
Output
(
"OutputNCHW"
)
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
padding
)
.
AddIntsArg
(
"padding_values"
,
padding_size
)
.
AddIntsArg
(
"output_shape"
,
output_shape
)
.
Finalize
(
net
.
NewOperatorDef
());
if
(
model_type
==
kernels
::
FrameworkType
::
CAFFE
)
{
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"InputNCHW"
)
.
Input
(
"FilterOIHW"
)
.
Input
(
"Bias"
)
.
Output
(
"OutputNCHW"
)
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
padding
)
.
AddIntsArg
(
"padding_values"
,
padding_size
)
.
AddIntArg
(
"framework_type"
,
model_type
)
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
net
.
AddInputFromArray
<
D
,
int32_t
>
(
"OutputShape"
,
{
4
},
output_shape
);
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"InputNCHW"
)
.
Input
(
"FilterOIHW"
)
.
Input
(
"OutputShape"
)
.
Input
(
"Bias"
)
.
Output
(
"OutputNCHW"
)
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
padding
)
.
AddIntsArg
(
"padding_values"
,
padding_size
)
.
AddIntArg
(
"framework_type"
,
model_type
)
.
Finalize
(
net
.
NewOperatorDef
());
}
// Run
net
.
RunOp
(
D
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
NCHW
,
...
...
@@ -85,144 +128,186 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
template
<
DeviceType
D
>
void
TestNHWCSimple3x3SAME_S1
()
{
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
1
,
Padding
::
SAME
,
{},
{
1
,
3
,
3
,
3
},
{
3
,
3
,
3
,
1
},
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
0.5
,
0.6
,
0.7
},
1
,
Padding
::
SAME
,
{},
{
1
,
3
,
3
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
3
,
3
,
3
},
{
4
,
4
,
4
,
6
,
6
,
6
,
4
,
4
,
4
,
6
,
6
,
6
,
9
,
9
,
9
,
6
,
6
,
6
,
4
,
4
,
4
,
6
,
6
,
6
,
4
,
4
,
4
});
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
1
,
Padding
::
VALID
,
{
2
,
2
},
{
0
},
{
3
,
3
,
3
,
1
},
{
1
,
3
,
3
,
3
},
{
4.5
,
4.6
,
4.7
,
6.5
,
6.6
,
6.7
,
4.5
,
4.6
,
4.7
,
6.5
,
6.6
,
6.7
,
9.5
,
9.6
,
9.7
,
6.5
,
6.6
,
6.7
,
4.5
,
4.6
,
4.7
,
6.5
,
6.6
,
6.7
,
4.5
,
4.6
,
4.7
},
kernels
::
FrameworkType
::
TENSORFLOW
);
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
0
,
0
,
0
},
1
,
Padding
::
VALID
,
{
2
,
2
},
{
0
},
{
3
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
3
,
3
,
3
},
{
4
,
4
,
4
,
6
,
6
,
6
,
4
,
4
,
4
,
6
,
6
,
6
,
9
,
9
,
9
,
6
,
6
,
6
,
4
,
4
,
4
,
6
,
6
,
6
,
4
,
4
,
4
});
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
1
,
Padding
::
SAME
,
{},
{
1
,
3
,
3
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
3
,
3
,
3
},
{
4
,
4
,
4
,
6
,
6
,
6
,
4
,
4
,
4
,
6
,
6
,
6
,
9
,
9
,
9
,
6
,
6
,
6
,
4
,
4
,
4
,
6
,
6
,
6
,
4
,
4
,
4
},
kernels
::
FrameworkType
::
CAFFE
);
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
{
0
,
0
,
0
},
1
,
Padding
::
SAME
,
{},
{
1
,
3
,
3
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
},
{
1
,
3
,
3
,
3
},
{
54
,
66
,
78
,
126
,
147
,
168
,
130
,
146
,
162
,
198
,
225
,
252
,
405
,
450
,
495
,
366
,
399
,
432
,
354
,
378
,
402
,
630
,
669
,
708
,
502
,
530
,
558
});
RunTestSimple
<
D
>
(
{
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
1
,
Padding
::
SAME
,
{
2
,
2
},
{
0
},
{
3
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
{
1
,
3
,
3
,
3
},
{
54
,
66
,
78
,
126
,
147
,
168
,
130
,
146
,
162
,
198
,
225
,
252
,
405
,
450
,
495
,
366
,
399
,
432
,
354
,
378
,
402
,
630
,
669
,
708
,
502
,
530
,
558
},
kernels
::
FrameworkType
::
TENSORFLOW
);
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
{
0
,
0
,
0
},
1
,
Padding
::
SAME
,
{
2
,
2
},
{
0
},
{
3
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
},
{
1
,
3
,
3
,
3
},
{
54
,
66
,
78
,
126
,
147
,
168
,
130
,
146
,
162
,
198
,
225
,
252
,
405
,
450
,
495
,
366
,
399
,
432
,
354
,
378
,
402
,
630
,
669
,
708
,
502
,
530
,
558
});
{
1
,
3
,
3
,
3
},
{
54
,
66
,
78
,
126
,
147
,
168
,
130
,
146
,
162
,
198
,
225
,
252
,
405
,
450
,
495
,
366
,
399
,
432
,
354
,
378
,
402
,
630
,
669
,
708
,
502
,
530
,
558
},
kernels
::
FrameworkType
::
CAFFE
);
}
template
<
DeviceType
D
>
void
TestNHWCSimple3x3SAME_S2
()
{
RunTestSimple
<
D
>
(
{
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
2
,
Padding
::
SAME
,
{},
{
1
,
6
,
6
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
6
,
6
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
});
RunTestSimple
<
D
>
(
{
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
2
,
Padding
::
SAME
,
{
2
,
2
},
{
0
},
{
3
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
5
,
5
,
3
},
{
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
});
RunTestSimple
<
D
>
(
{
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
2
,
Padding
::
SAME
,
{},
{
1
,
6
,
6
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
},
{
1
,
6
,
6
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
,
9
,
12
,
15
,
8
,
10
,
12
,
17
,
22
,
27
,
12
,
15
,
18
,
10
,
11
,
12
,
13
,
14
,
15
,
36
,
39
,
42
,
26
,
28
,
30
,
62
,
67
,
72
,
39
,
42
,
45
,
23
,
28
,
33
,
38
,
43
,
48
,
96
,
108
,
120
,
64
,
71
,
78
,
148
,
164
,
180
,
90
,
99
,
108
,
40
,
44
,
48
,
52
,
56
,
60
,
114
,
123
,
132
,
65
,
70
,
75
,
140
,
151
,
162
,
78
,
84
,
90
,
83
,
94
,
105
,
116
,
127
,
138
,
252
,
276
,
300
,
142
,
155
,
168
,
304
,
332
,
360
,
168
,
183
,
198
,
70
,
77
,
84
,
91
,
98
,
105
,
192
,
207
,
222
,
104
,
112
,
120
,
218
,
235
,
252
,
117
,
126
,
135
});
RunTestSimple
<
D
>
(
{
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
2
,
Padding
::
SAME
,
{
2
,
2
},
{
0
},
{
3
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
},
{
1
,
5
,
5
,
3
},
{
13
,
14
,
15
,
36
,
39
,
42
,
26
,
28
,
30
,
62
,
67
,
72
,
39
,
42
,
45
,
38
,
43
,
48
,
96
,
108
,
120
,
64
,
71
,
78
,
148
,
164
,
180
,
90
,
99
,
108
,
52
,
56
,
60
,
114
,
123
,
132
,
65
,
70
,
75
,
140
,
151
,
162
,
78
,
84
,
90
,
116
,
127
,
138
,
252
,
276
,
300
,
142
,
155
,
168
,
304
,
332
,
360
,
168
,
183
,
198
,
91
,
98
,
105
,
192
,
207
,
222
,
104
,
112
,
120
,
218
,
235
,
252
,
117
,
126
,
135
});
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
0
,
0
,
0
},
2
,
Padding
::
SAME
,
{},
{
1
,
6
,
6
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
6
,
6
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
},
kernels
::
FrameworkType
::
TENSORFLOW
);
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
0
,
0
,
0
},
2
,
Padding
::
SAME
,
{
2
,
2
},
{
0
},
{
3
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
5
,
5
,
3
},
{
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
},
kernels
::
FrameworkType
::
CAFFE
);
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
{
0
,
0
,
0
},
2
,
Padding
::
SAME
,
{},
{
1
,
6
,
6
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
},
{
1
,
6
,
6
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
,
9
,
12
,
15
,
8
,
10
,
12
,
17
,
22
,
27
,
12
,
15
,
18
,
10
,
11
,
12
,
13
,
14
,
15
,
36
,
39
,
42
,
26
,
28
,
30
,
62
,
67
,
72
,
39
,
42
,
45
,
23
,
28
,
33
,
38
,
43
,
48
,
96
,
108
,
120
,
64
,
71
,
78
,
148
,
164
,
180
,
90
,
99
,
108
,
40
,
44
,
48
,
52
,
56
,
60
,
114
,
123
,
132
,
65
,
70
,
75
,
140
,
151
,
162
,
78
,
84
,
90
,
83
,
94
,
105
,
116
,
127
,
138
,
252
,
276
,
300
,
142
,
155
,
168
,
304
,
332
,
360
,
168
,
183
,
198
,
70
,
77
,
84
,
91
,
98
,
105
,
192
,
207
,
222
,
104
,
112
,
120
,
218
,
235
,
252
,
117
,
126
,
135
},
kernels
::
FrameworkType
::
TENSORFLOW
);
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
{
0
,
0
,
0
},
2
,
Padding
::
SAME
,
{
2
,
2
},
{
0
},
{
3
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
},
{
1
,
5
,
5
,
3
},
{
13
,
14
,
15
,
36
,
39
,
42
,
26
,
28
,
30
,
62
,
67
,
72
,
39
,
42
,
45
,
38
,
43
,
48
,
96
,
108
,
120
,
64
,
71
,
78
,
148
,
164
,
180
,
90
,
99
,
108
,
52
,
56
,
60
,
114
,
123
,
132
,
65
,
70
,
75
,
140
,
151
,
162
,
78
,
84
,
90
,
116
,
127
,
138
,
252
,
276
,
300
,
142
,
155
,
168
,
304
,
332
,
360
,
168
,
183
,
198
,
91
,
98
,
105
,
192
,
207
,
222
,
104
,
112
,
120
,
218
,
235
,
252
,
117
,
126
,
135
},
kernels
::
FrameworkType
::
CAFFE
);
}
template
<
DeviceType
D
>
void
TestNHWCSimple3x3SAME_S2_1
()
{
RunTestSimple
<
D
>
(
{
1
,
3
,
3
,
1
},
{
12
,
18
,
12
,
18
,
27
,
18
,
12
,
18
,
12
},
2
,
Padding
::
SAME
,
{},
{
1
,
5
,
5
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
5
,
5
,
3
},
{
12
,
12
,
12
,
30
,
30
,
30
,
18
,
18
,
18
,
30
,
30
,
30
,
12
,
12
,
12
,
30
,
30
,
30
,
75
,
75
,
75
,
45
,
45
,
45
,
75
,
75
,
75
,
30
,
30
,
30
,
18
,
18
,
18
,
45
,
45
,
45
,
27
,
27
,
27
,
45
,
45
,
45
,
18
,
18
,
18
,
30
,
30
,
30
,
75
,
75
,
75
,
45
,
45
,
45
,
75
,
75
,
75
,
30
,
30
,
30
,
12
,
12
,
12
,
30
,
30
,
30
,
18
,
18
,
18
,
30
,
30
,
30
,
12
,
12
,
12
});
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
12
,
18
,
12
,
18
,
27
,
18
,
12
,
18
,
12
},
{
0
,
0
,
0
},
2
,
Padding
::
SAME
,
{},
{
1
,
5
,
5
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
5
,
5
,
3
},
{
12
,
12
,
12
,
30
,
30
,
30
,
18
,
18
,
18
,
30
,
30
,
30
,
12
,
12
,
12
,
30
,
30
,
30
,
75
,
75
,
75
,
45
,
45
,
45
,
75
,
75
,
75
,
30
,
30
,
30
,
18
,
18
,
18
,
45
,
45
,
45
,
27
,
27
,
27
,
45
,
45
,
45
,
18
,
18
,
18
,
30
,
30
,
30
,
75
,
75
,
75
,
45
,
45
,
45
,
75
,
75
,
75
,
30
,
30
,
30
,
12
,
12
,
12
,
30
,
30
,
30
,
18
,
18
,
18
,
30
,
30
,
30
,
12
,
12
,
12
},
kernels
::
FrameworkType
::
TENSORFLOW
);
}
template
<
DeviceType
D
>
void
TestNHWCSimple3x3VALID_S2
()
{
RunTestSimple
<
D
>
(
{
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
2
,
Padding
::
VALID
,
{},
{
1
,
7
,
7
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
7
,
7
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
});
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
0
,
0
,
0
},
2
,
Padding
::
VALID
,
{},
{
1
,
7
,
7
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
7
,
7
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
},
kernels
::
FrameworkType
::
TENSORFLOW
);
}
template
<
DeviceType
D
>
void
TestNHWCSimple3x3VALID_S1
()
{
RunTestSimple
<
D
>
(
{
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
1
,
Padding
::
VALID
,
{},
{
1
,
5
,
5
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
},
{
1
,
5
,
5
,
3
},
{
1
,
2
,
3
,
6
,
9
,
12
,
18
,
24
,
30
,
26
,
31
,
36
,
21
,
24
,
27
,
14
,
19
,
24
,
54
,
66
,
78
,
126
,
147
,
168
,
130
,
146
,
162
,
90
,
99
,
108
,
66
,
78
,
90
,
198
,
225
,
252
,
405
,
450
,
495
,
366
,
399
,
432
,
234
,
252
,
270
,
146
,
157
,
168
,
354
,
378
,
402
,
630
,
669
,
708
,
502
,
530
,
558
,
294
,
309
,
324
,
133
,
140
,
147
,
306
,
321
,
336
,
522
,
546
,
570
,
398
,
415
,
432
,
225
,
234
,
243
});
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
{
0
,
0
,
0
},
1
,
Padding
::
VALID
,
{},
{
1
,
5
,
5
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
},
{
1
,
5
,
5
,
3
},
{
1
,
2
,
3
,
6
,
9
,
12
,
18
,
24
,
30
,
26
,
31
,
36
,
21
,
24
,
27
,
14
,
19
,
24
,
54
,
66
,
78
,
126
,
147
,
168
,
130
,
146
,
162
,
90
,
99
,
108
,
66
,
78
,
90
,
198
,
225
,
252
,
405
,
450
,
495
,
366
,
399
,
432
,
234
,
252
,
270
,
146
,
157
,
168
,
354
,
378
,
402
,
630
,
669
,
708
,
502
,
530
,
558
,
294
,
309
,
324
,
133
,
140
,
147
,
306
,
321
,
336
,
522
,
546
,
570
,
398
,
415
,
432
,
225
,
234
,
243
},
kernels
::
FrameworkType
::
TENSORFLOW
);
}
template
<
DeviceType
D
>
void
TestNHWCSimple2x2SAME
()
{
RunTestSimple
<
D
>
({
1
,
2
,
2
,
1
},
{
1
,
1
,
1
,
1
},
1
,
Padding
::
SAME
,
{},
RunTestSimple
<
D
>
({
1
,
2
,
2
,
1
},
{
1
,
1
,
1
,
1
},
{
0
},
1
,
Padding
::
SAME
,
{},
{
1
,
2
,
2
,
1
},
{
3
,
3
,
1
,
1
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
{
1
,
2
,
2
,
1
},
{
4.
f
,
4.
f
,
4.
f
,
4.
f
});
{
1
,
2
,
2
,
1
},
{
4.
f
,
4.
f
,
4.
f
,
4.
f
},
kernels
::
FrameworkType
::
TENSORFLOW
);
}
template
<
DeviceType
D
>
void
TestNHWCSimple2x2VALID
()
{
RunTestSimple
<
D
>
(
{
1
,
2
,
2
,
1
},
{
1
,
1
,
1
,
1
},
2
,
Padding
::
VALID
,
{},
{
1
,
5
,
5
,
1
},
{
1
,
2
,
2
,
1
},
{
1
,
1
,
1
,
1
},
{
0
},
2
,
Padding
::
VALID
,
{},
{
1
,
5
,
5
,
1
},
{
3
,
3
,
1
,
1
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
{
1
,
5
,
5
,
1
},
{
1.
f
,
1.
f
,
2.
f
,
1.
f
,
1.
f
,
1.
f
,
1.
f
,
2.
f
,
1.
f
,
1.
f
,
2.
f
,
2.
f
,
4.
f
,
2.
f
,
2.
f
,
1.
f
,
1.
f
,
2.
f
,
1.
f
,
1.
f
,
1.
f
,
1.
f
,
2.
f
,
1.
f
,
1.
f
});
2.
f
,
2.
f
,
1.
f
,
1.
f
,
2.
f
,
1.
f
,
1.
f
,
1.
f
,
1.
f
,
2.
f
,
1.
f
,
1.
f
},
kernels
::
FrameworkType
::
TENSORFLOW
);
}
}
// namespace
...
...
@@ -311,7 +396,11 @@ void TestComplexDeconvNxNS12(const int batch,
std
::
vector
<
int
>
paddings
;
std
::
vector
<
int
>
output_shape
;
if
(
padding
<
0
)
{
kernels
::
FrameworkType
model_type
=
padding
<
0
?
kernels
::
FrameworkType
::
TENSORFLOW
:
kernels
::
FrameworkType
::
CAFFE
;
if
(
model_type
==
kernels
::
FrameworkType
::
TENSORFLOW
)
{
if
(
type
==
Padding
::
SAME
)
{
out_h
=
(
height
-
1
)
*
stride_h
+
1
;
out_w
=
(
width
-
1
)
*
stride_w
+
1
;
...
...
@@ -323,24 +412,38 @@ void TestComplexDeconvNxNS12(const int batch,
output_shape
.
push_back
(
out_h
);
output_shape
.
push_back
(
out_w
);
output_shape
.
push_back
(
output_channels
);
net
.
AddInputFromArray
<
D
,
int32_t
>
(
"OutputShape"
,
{
4
},
output_shape
);
}
else
{
// out_h = (height - 1) * stride + 1 + padding - kernel_h + 1;
// out_w = (width -1) * stride + 1 + padding - kernel_w + 1;
paddings
.
push_back
(
padding
);
paddings
.
push_back
(
padding
);
}
// Construct graph
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"InputNCHW"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"OutputNCHW"
)
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"padding_values"
,
paddings
)
.
AddIntsArg
(
"output_shape"
,
output_shape
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
if
(
model_type
==
kernels
::
FrameworkType
::
CAFFE
)
{
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"InputNCHW"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"OutputNCHW"
)
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntsArg
(
"padding_values"
,
paddings
)
.
AddIntArg
(
"framework_type"
,
model_type
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"InputNCHW"
)
.
Input
(
"Filter"
)
.
Input
(
"OutputShape"
)
.
Input
(
"Bias"
)
.
Output
(
"OutputNCHW"
)
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntArg
(
"framework_type"
,
model_type
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
// run on cpu
net
.
RunOp
();
...
...
@@ -360,17 +463,30 @@ void TestComplexDeconvNxNS12(const int batch,
BufferToImage
<
D
,
T
>
(
&
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"BiasImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"padding_values"
,
paddings
)
.
AddIntsArg
(
"output_shape"
,
output_shape
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
if
(
model_type
==
kernels
::
FrameworkType
::
CAFFE
)
{
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"BiasImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntsArg
(
"padding_values"
,
paddings
)
.
AddIntArg
(
"framework_type"
,
model_type
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"OutputShape"
)
.
Input
(
"BiasImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntArg
(
"framework_type"
,
model_type
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
// Run on device
net
.
RunOp
(
D
);
...
...
mace/python/tools/converter_tool/base_converter.py
浏览文件 @
13a7f090
...
...
@@ -70,6 +70,11 @@ class EltwiseType(Enum):
EQUAL
=
10
class
FrameworkType
(
Enum
):
TENSORFLOW
=
0
CAFFE
=
1
MaceSupportedOps
=
[
'Activation'
,
'AddN'
,
...
...
@@ -176,6 +181,7 @@ class MaceKeyword(object):
mace_seperate_buffer_str
=
'seperate_buffer'
mace_scalar_input_index_str
=
'scalar_input_index'
mace_opencl_mem_type
=
"opencl_mem_type"
mace_framework_type_str
=
"framework_type"
class
TransformerRule
(
Enum
):
...
...
mace/python/tools/converter_tool/caffe_converter.py
浏览文件 @
13a7f090
...
...
@@ -25,6 +25,7 @@ from mace.python.tools.converter_tool import shape_inference
from
mace.python.tools.converter_tool.base_converter
import
PoolingType
from
mace.python.tools.converter_tool.base_converter
import
ActivationType
from
mace.python.tools.converter_tool.base_converter
import
EltwiseType
from
mace.python.tools.converter_tool.base_converter
import
FrameworkType
from
mace.python.tools.converter_tool.base_converter
import
DataFormat
from
mace.python.tools.converter_tool.base_converter
import
FilterFormat
from
mace.python.tools.converter_tool.base_converter
import
MaceOp
...
...
@@ -351,6 +352,10 @@ class CaffeConverter(base_converter.ConverterInterface):
data_type_arg
.
name
=
'T'
data_type_arg
.
i
=
self
.
_option
.
data_type
framework_type_arg
=
op
.
arg
.
add
()
framework_type_arg
.
name
=
MaceKeyword
.
mace_framework_type_str
framework_type_arg
.
i
=
FrameworkType
.
CAFFE
.
value
ConverterUtil
.
add_data_format_arg
(
op
,
DataFormat
.
NCHW
)
return
op
...
...
mace/python/tools/converter_tool/tensorflow_converter.py
浏览文件 @
13a7f090
...
...
@@ -25,6 +25,7 @@ from mace.python.tools.converter_tool.base_converter import PoolingType
from
mace.python.tools.converter_tool.base_converter
import
PaddingMode
from
mace.python.tools.converter_tool.base_converter
import
ActivationType
from
mace.python.tools.converter_tool.base_converter
import
EltwiseType
from
mace.python.tools.converter_tool.base_converter
import
FrameworkType
from
mace.python.tools.converter_tool.base_converter
import
DataFormat
from
mace.python.tools.converter_tool.base_converter
import
FilterFormat
from
mace.python.tools.converter_tool.base_converter
import
MaceOp
...
...
@@ -372,6 +373,10 @@ class TensorflowConverter(base_converter.ConverterInterface):
except
ValueError
:
data_type_arg
.
i
=
self
.
_option
.
data_type
framework_type_arg
=
op
.
arg
.
add
()
framework_type_arg
.
name
=
MaceKeyword
.
mace_framework_type_str
framework_type_arg
.
i
=
FrameworkType
.
TENSORFLOW
.
value
ConverterUtil
.
add_data_format_arg
(
op
,
DataFormat
.
NHWC
)
return
op
...
...
@@ -414,13 +419,13 @@ class TensorflowConverter(base_converter.ConverterInterface):
"deconv should have (>=) 3 inputs."
)
output_shape_arg
=
op
.
arg
.
add
()
output_shape_arg
.
name
=
MaceKeyword
.
mace_output_shape_str
if
tf_op
.
inputs
[
0
].
op
.
type
==
TFOpType
.
Const
.
name
:
output_shape_value
=
\
tf_op
.
inputs
[
0
].
eval
().
astype
(
np
.
int32
).
flat
output_shape_arg
.
ints
.
extend
(
output_shape_value
)
else
:
output_shape_value
=
{}
output_shape_arg
.
ints
.
extend
(
output_shape_value
)
#
if tf_op.inputs[0].op.type == TFOpType.Const.name:
#
output_shape_value = \
#
tf_op.inputs[0].eval().astype(np.int32).flat
#
output_shape_arg.ints.extend(output_shape_value)
#
else:
#
output_shape_value = {}
#
output_shape_arg.ints.extend(output_shape_value)
del
op
.
input
[:]
op
.
input
.
extend
([
tf_op
.
inputs
[
2
].
name
,
tf_op
.
inputs
[
1
].
name
,
...
...
mace/python/tools/converter_tool/transformer.py
浏览文件 @
13a7f090
...
...
@@ -26,6 +26,7 @@ from mace.python.tools.converter_tool.base_converter import ConverterUtil
from
mace.python.tools.converter_tool.base_converter
import
DataFormat
from
mace.python.tools.converter_tool.base_converter
import
DeviceType
from
mace.python.tools.converter_tool.base_converter
import
EltwiseType
from
mace.python.tools.converter_tool.base_converter
import
FrameworkType
from
mace.python.tools.converter_tool.base_converter
import
FilterFormat
from
mace.python.tools.converter_tool.base_converter
import
MaceKeyword
from
mace.python.tools.converter_tool.base_converter
import
MaceOp
...
...
@@ -810,12 +811,22 @@ class Transformer(base_converter.ConverterInterface):
net
=
self
.
_model
for
op
in
net
.
op
:
if
(((
op
.
type
==
MaceOp
.
Conv2D
.
name
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
or
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
or
op
.
type
==
MaceOp
.
FullyConnected
.
name
)
and
len
(
op
.
input
)
==
2
)
or
(
op
.
type
==
MaceOp
.
WinogradInverseTransform
.
name
and
len
(
op
.
input
)
==
1
))
\
and
len
(
op
.
input
)
==
1
)
or
(
op
.
type
==
MaceOp
.
Deconv2D
.
name
and
((
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_framework_type_str
).
i
==
FrameworkType
.
CAFFE
.
value
and
len
(
op
.
input
)
==
2
)
or
(
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_framework_type_str
).
i
==
FrameworkType
.
TENSORFLOW
.
value
and
len
(
op
.
input
)
==
3
))))
\
and
len
(
self
.
_consumers
.
get
(
op
.
output
[
0
],
[]))
==
1
:
consumer_op
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
if
consumer_op
.
type
==
MaceOp
.
BiasAdd
.
name
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录