Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
项目经理老王
Mace
提交
e9d1103b
Mace
项目概览
项目经理老王
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e9d1103b
编写于
8月 27, 2018
作者:
L
liutuo
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fold shape_stridedslice_stack
上级
50666a81
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
126 addition
and
116 deletion
+126
-116
mace/kernels/deconv_2d.h
mace/kernels/deconv_2d.h
+36
-42
mace/kernels/opencl/cl/deconv_2d.cl
mace/kernels/opencl/cl/deconv_2d.cl
+21
-19
mace/kernels/opencl/deconv_2d.cc
mace/kernels/opencl/deconv_2d.cc
+39
-25
mace/ops/deconv_2d.h
mace/ops/deconv_2d.h
+7
-8
mace/ops/deconv_2d_test.cc
mace/ops/deconv_2d_test.cc
+9
-15
mace/ops/shape.h
mace/ops/shape.h
+14
-2
mace/python/tools/converter_tool/base_converter.py
mace/python/tools/converter_tool/base_converter.py
+0
-1
mace/python/tools/converter_tool/caffe_converter.py
mace/python/tools/converter_tool/caffe_converter.py
+0
-4
未找到文件。
mace/kernels/deconv_2d.h
浏览文件 @
e9d1103b
...
...
@@ -90,20 +90,18 @@ void Deconv2dNCHW(const T *input,
}
// namespace deconv
struct
Deconv2dFunctorBase
{
Deconv2dFunctorBase
(
const
int
*
strides
,
Deconv2dFunctorBase
(
const
std
::
vector
<
int
>
&
strides
,
const
Padding
&
padding_type
,
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
index_t
>
&
output_shape
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
bool
from_caffe
)
const
float
relux_max_limit
)
:
strides_
(
strides
),
padding_type_
(
padding_type
),
paddings_
(
paddings
),
output_shape_
(
output_shape
),
activation_
(
activation
),
relux_max_limit_
(
relux_max_limit
),
from_caffe_
(
from_caffe
)
{}
relux_max_limit_
(
relux_max_limit
)
{}
static
void
CalcDeconvOutputSize
(
const
index_t
*
input_shape
,
// NHWC
...
...
@@ -202,31 +200,28 @@ struct Deconv2dFunctorBase {
padding_size
[
1
]
=
std
::
max
<
int
>
(
0
,
p_w
);
}
const
int
*
strides_
;
// [stride_h, stride_w]
std
::
vector
<
int
>
strides_
;
// [stride_h, stride_w]
const
Padding
padding_type_
;
std
::
vector
<
int
>
paddings_
;
std
::
vector
<
index_t
>
output_shape_
;
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
const
bool
from_caffe_
;
};
template
<
DeviceType
D
,
typename
T
>
struct
Deconv2dFunctor
:
Deconv2dFunctorBase
{
Deconv2dFunctor
(
const
int
*
strides
,
Deconv2dFunctor
(
const
std
::
vector
<
int
>
&
strides
,
const
Padding
&
padding_type
,
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
index_t
>
&
output_shape
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
bool
from_caffe
)
const
float
relux_max_limit
)
:
Deconv2dFunctorBase
(
strides
,
padding_type
,
paddings
,
output_shape
,
activation
,
relux_max_limit
,
from_caffe
)
{}
relux_max_limit
)
{}
MaceStatus
operator
()(
const
Tensor
*
input
,
// NCHW
const
Tensor
*
filter
,
// OIHW
...
...
@@ -239,13 +234,12 @@ struct Deconv2dFunctor : Deconv2dFunctorBase {
MACE_CHECK_NOTNULL
(
filter
);
MACE_CHECK_NOTNULL
(
output
);
if
(
!
from_caffe_
)
{
// tensorflow
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
int
>
paddings
(
2
);
std
::
vector
<
index_t
>
output_shape
(
4
);
if
(
paddings_
.
empty
())
{
// tensorflow
paddings
=
std
::
vector
<
int
>
(
2
,
0
);
if
(
output_shape_
.
size
()
==
4
)
{
output_shape
[
0
]
=
output_shape_
[
0
];
output_shape
[
1
]
=
output_shape_
[
3
];
output_shape
[
2
]
=
output_shape_
[
1
];
output_shape
[
3
]
=
output_shape_
[
2
];
output_shape
=
output_shape_
;
}
else
{
MACE_CHECK_NOTNULL
(
output_shape_tensor
);
MACE_CHECK
(
output_shape_tensor
->
size
()
==
4
);
...
...
@@ -255,36 +249,38 @@ struct Deconv2dFunctor : Deconv2dFunctorBase {
output_shape
=
std
::
vector
<
index_t
>
(
output_shape_data
,
output_shape_data
+
4
);
}
paddings_
.
clear
();
paddings_
=
std
::
vector
<
int
>
(
2
,
0
);
const
index_t
t
=
output_shape
[
1
];
output_shape
[
1
]
=
output_shape
[
3
];
output_shape
[
3
]
=
output_shape
[
2
];
output_shape
[
2
]
=
t
;
CalcDeconvPaddingAndInputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
strides_
,
padding_type_
,
strides_
.
data
()
,
padding_type_
,
output_shape
.
data
(),
paddings_
.
data
(),
true
);
MACE_RETURN_IF_ERROR
(
output
->
Resize
(
output_shape
));
paddings
.
data
(),
true
);
}
else
{
// caffe
output_shape_
.
clear
()
;
output_shape
_
=
std
::
vector
<
index_t
>
(
4
,
0
);
paddings
=
paddings_
;
output_shape
=
std
::
vector
<
index_t
>
(
4
,
0
);
CalcDeconvOutputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
strides_
,
output_shape_
.
data
(),
paddings_
.
data
(),
true
);
MACE_RETURN_IF_ERROR
(
output
->
Resize
(
output_shape_
));
strides_
.
data
(),
output_shape
.
data
(),
paddings
.
data
(),
true
);
}
MACE_RETURN_IF_ERROR
(
output
->
Resize
(
output_shape
));
index_t
kernel_h
=
filter
->
dim
(
2
);
index_t
kernel_w
=
filter
->
dim
(
3
);
const
index_t
*
in_shape
=
input
->
shape
().
data
();
const
index_t
*
out_shape
=
output
->
shape
().
data
();
const
index_t
kernel_hw
[
2
]
=
{
kernel_h
,
kernel_w
};
MACE_CHECK
(
filter
->
dim
(
0
)
==
out_shape
[
1
],
filter
->
dim
(
0
),
" != "
,
out_shape
[
1
]);
MACE_CHECK
(
filter
->
dim
(
0
)
==
out
put
_shape
[
1
],
filter
->
dim
(
0
),
" != "
,
out
put
_shape
[
1
]);
MACE_CHECK
(
filter
->
dim
(
1
)
==
in_shape
[
1
],
filter
->
dim
(
1
),
" != "
,
in_shape
[
1
]);
MACE_CHECK
(
in_shape
[
0
]
==
out_shape
[
0
],
"Input/Output batch size mismatch"
);
MACE_CHECK
(
in_shape
[
0
]
==
output_shape
[
0
],
"Input/Output batch size mismatch"
);
Tensor
::
MappingGuard
input_mapper
(
input
);
Tensor
::
MappingGuard
filter_mapper
(
filter
);
Tensor
::
MappingGuard
bias_mapper
(
bias
);
...
...
@@ -294,15 +290,15 @@ struct Deconv2dFunctor : Deconv2dFunctorBase {
auto
bias_data
=
bias
==
nullptr
?
nullptr
:
bias
->
data
<
T
>
();
auto
output_data
=
output
->
mutable_data
<
T
>
();
int
padding
[
2
];
padding
[
0
]
=
(
paddings
_
[
0
]
+
1
)
>>
1
;
padding
[
1
]
=
(
paddings
_
[
1
]
+
1
)
>>
1
;
padding
[
0
]
=
(
paddings
[
0
]
+
1
)
>>
1
;
padding
[
1
]
=
(
paddings
[
1
]
+
1
)
>>
1
;
deconv
::
Deconv2dNCHW
(
input_data
,
filter_data
,
bias_data
,
in_shape
,
out
_shape
,
out
put_shape
.
data
()
,
kernel_hw
,
strides_
,
strides_
.
data
()
,
padding
,
output_data
);
...
...
@@ -319,20 +315,18 @@ struct Deconv2dFunctor : Deconv2dFunctorBase {
#ifdef MACE_ENABLE_OPENCL
template
<
typename
T
>
struct
Deconv2dFunctor
<
DeviceType
::
GPU
,
T
>
:
Deconv2dFunctorBase
{
Deconv2dFunctor
(
const
int
*
strides
,
Deconv2dFunctor
(
const
std
::
vector
<
int
>
&
strides
,
const
Padding
&
padding_type
,
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
index_t
>
&
output_shape
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
bool
from_caffe
)
const
float
relux_max_limit
)
:
Deconv2dFunctorBase
(
strides
,
padding_type
,
paddings
,
output_shape
,
activation
,
relux_max_limit
,
from_caffe
)
{}
relux_max_limit
)
{}
MaceStatus
operator
()(
const
Tensor
*
input
,
const
Tensor
*
filter
,
...
...
mace/kernels/opencl/cl/deconv_2d.cl
浏览文件 @
e9d1103b
...
...
@@ -15,8 +15,10 @@ __kernel void deconv_2d(KERNEL_ERROR_PARAMS
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
out_channel,
__private
const
int
stride,
__private
const
float
stride_r,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
float
stride_h_r,
__private
const
float
stride_w_r,
__private
const
int
align_h,
__private
const
int
align_w,
__private
const
int
padding_h,
...
...
@@ -53,18 +55,18 @@ __kernel void deconv_2d(KERNEL_ERROR_PARAMS
DATA_TYPE4 out4 = 0;
#endif
const int n_stride = mad(w_id, stride_r, 0);
const int mod_stride = w_id - mul24(n_stride, stride);
const int w = mad24(mul24(n_stride, 5), stride, mod_stride);
const int n_stride = mad(w_id, stride_
w_
r, 0);
const int mod_stride = w_id - mul24(n_stride, stride
_w
);
const int w = mad24(mul24(n_stride, 5), stride
_w
, mod_stride);
const int b = hb / out_height;
const int h = hb - mul24(b, out_height);
if (w < out_width) {
int start_x = floor((float) (w + align_w) * stride_r);
int start_y = (h + align_h) * stride_r;
int start_x = floor((float) (w + align_w) * stride_
w_
r);
int start_y = (h + align_h) * stride_
h_
r;
start_y = max(0, start_y);
int f_start_x = mad24(start_x, stride, padding_w) - w;
int f_start_y = mad24(start_y, stride, padding_h) - h;
int f_start_x = mad24(start_x, stride
_w
, padding_w) - w;
int f_start_y = mad24(start_y, stride
_h
, padding_h) - h;
f_start_x = kernel_w - 1 - f_start_x;
f_start_y = kernel_h - 1 - f_start_y;
...
...
@@ -79,10 +81,10 @@ __kernel void deconv_2d(KERNEL_ERROR_PARAMS
f_pos_x1 = f_pos_x0 + 1;
f_pos_x2 = f_pos_x0 + 2;
f_pos_x3 = f_pos_x0 + 3;
for (int f_y = f_start_y, idx_h = start_y ; f_y >= 0; f_y -= stride, ++idx_h) {
for (int f_y = f_start_y, idx_h = start_y ; f_y >= 0; f_y -= stride
_h
, ++idx_h) {
index_y = mad24(b, in_height, idx_h);
in_pos.y = select(index_y, -1, idx_h < 0 || idx_h >= in_height);
for (int f_x = f_start_x, idx_w = start_x; f_x >= 0; f_x -= stride, ++idx_w) {
for (int f_x = f_start_x, idx_w = start_x; f_x >= 0; f_x -= stride
_w
, ++idx_w) {
f_pos_y = mad24(f_y, kernel_w, f_x);
f_pos_y = mad24(c, kernel_size, f_pos_y);
weight0 = READ_IMAGET(weights, SAMPLER, (int2)(f_pos_x0, f_pos_y));
...
...
@@ -141,24 +143,24 @@ __kernel void deconv_2d(KERNEL_ERROR_PARAMS
out_pos.x
=
mad24
(
c,
out_width,
ow
)
;
WRITE_IMAGET
(
output,
out_pos,
out0
)
;
ow
+=
stride
;
ow
+=
stride
_w
;
if
(
ow
>=
out_width
)
return
;
out_pos.x
+=
stride
;
out_pos.x
+=
stride
_w
;
WRITE_IMAGET
(
output,
out_pos,
out1
)
;
ow
+=
stride
;
ow
+=
stride
_w
;
if
(
ow
>=
out_width
)
return
;
out_pos.x
+=
stride
;
out_pos.x
+=
stride
_w
;
WRITE_IMAGET
(
output,
out_pos,
out2
)
;
ow
+=
stride
;
ow
+=
stride
_w
;
if
(
ow
>=
out_width
)
return
;
out_pos.x
+=
stride
;
out_pos.x
+=
stride
_w
;
WRITE_IMAGET
(
output,
out_pos,
out3
)
;
ow
+=
stride
;
ow
+=
stride
_w
;
if
(
ow
>=
out_width
)
return
;
out_pos.x
+=
stride
;
out_pos.x
+=
stride
_w
;
WRITE_IMAGET
(
output,
out_pos,
out4
)
;
}
}
\ No newline at end of file
mace/kernels/opencl/deconv_2d.cc
浏览文件 @
e9d1103b
...
...
@@ -24,7 +24,7 @@ MaceStatus Deconv2dOpencl(cl::Kernel *kernel,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
stride
,
const
int
*
strides
,
const
int
*
paddings
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
...
...
@@ -42,17 +42,20 @@ MaceStatus Deconv2dOpencl(cl::Kernel *kernel,
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
const
index_t
input_channel_blocks
=
RoundUpDiv4
(
input_channels
);
MACE_CHECK
(
stride
>
0
,
"stride should > 0."
);
const
int
stride_h
=
strides
[
0
];
const
int
stride_w
=
strides
[
1
];
MACE_CHECK
(
stride_w
>
0
&&
stride_h
>
0
,
"strides should be > 0."
);
#define MACE_WIDTH_BLK 5
const
index_t
n_strides
=
(
width
+
stride
-
1
)
/
stride
;
const
index_t
n_strides
=
(
width
+
stride
_w
-
1
)
/
stride_w
;
const
index_t
width_blocks
=
((
n_strides
+
MACE_WIDTH_BLK
-
1
)
/
MACE_WIDTH_BLK
)
*
stride
;
const
float
stride_r
=
1.
f
/
static_cast
<
float
>
(
stride
);
((
n_strides
+
MACE_WIDTH_BLK
-
1
)
/
MACE_WIDTH_BLK
)
*
stride_w
;
const
float
stride_h_r
=
1.
f
/
static_cast
<
float
>
(
stride_h
);
const
float
stride_w_r
=
1.
f
/
static_cast
<
float
>
(
stride_w
);
const
int
padding_h
=
(
paddings
[
0
]
+
1
)
>>
1
;
const
int
padding_w
=
(
paddings
[
0
]
+
1
)
>>
1
;
const
int
padding_w
=
(
paddings
[
1
]
+
1
)
>>
1
;
const
int
align_h
=
stride
-
1
-
padding_h
;
const
int
align_w
=
stride
-
1
-
padding_w
;
const
int
align_h
=
stride
_h
-
1
-
padding_h
;
const
int
align_w
=
stride
_w
-
1
-
padding_w
;
const
int
kernel_size
=
filter
->
dim
(
2
)
*
filter
->
dim
(
3
);
auto
runtime
=
OpenCLRuntime
::
Global
();
...
...
@@ -113,8 +116,10 @@ MaceStatus Deconv2dOpencl(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
width
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
channels
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
stride
));
kernel
->
setArg
(
idx
++
,
stride_r
);
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
stride_h
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
stride_w
));
kernel
->
setArg
(
idx
++
,
stride_h_r
);
kernel
->
setArg
(
idx
++
,
stride_w_r
);
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
align_h
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
align_w
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
padding_h
));
...
...
@@ -152,34 +157,43 @@ MaceStatus Deconv2dFunctor<DeviceType::GPU, T>::operator()(
MACE_CHECK_NOTNULL
(
input
);
MACE_CHECK_NOTNULL
(
filter
);
MACE_CHECK_NOTNULL
(
output
);
if
(
!
from_caffe_
)
{
std
::
vector
<
int
>
paddings
(
2
);
std
::
vector
<
index_t
>
output_shape
(
4
);
if
(
paddings_
.
empty
())
{
paddings
=
std
::
vector
<
int
>
(
2
,
0
);
if
(
output_shape_
.
size
()
!=
4
)
{
MACE_CHECK_NOTNULL
(
output_shape_tensor
);
MACE_CHECK
(
output_shape_tensor
->
size
()
==
4
);
Tensor
::
MappingGuard
output_shape_mapper
(
output_shape_tensor
);
auto
output_shape_data
=
output_shape_tensor
->
data
<
int32_t
>
();
output_shape
_
=
output_shape
=
std
::
vector
<
index_t
>
(
output_shape_data
,
output_shape_data
+
4
);
}
else
{
output_shape
=
output_shape_
;
}
paddings_
.
clear
();
paddings_
=
std
::
vector
<
int
>
(
2
,
0
);
CalcDeconvPaddingAndInputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
strides_
,
padding_type_
,
output_shape_
.
data
(),
paddings_
.
data
());
CalcDeconvPaddingAndInputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
strides_
.
data
(),
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
}
else
{
output_shape_
.
clear
();
output_shape_
=
std
::
vector
<
index_t
>
(
4
,
0
);
CalcDeconvOutputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
strides_
,
output_shape_
.
data
(),
paddings_
.
data
());
paddings
=
paddings_
;
output_shape
=
std
::
vector
<
index_t
>
(
4
,
0
);
CalcDeconvOutputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
strides_
.
data
(),
output_shape
.
data
(),
paddings
.
data
());
}
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
_
,
BufferType
::
IN_OUT_CHANNEL
,
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
&
output_image_shape
);
MACE_RETURN_IF_ERROR
(
output
->
ResizeImage
(
output_shape
_
,
output_image_shape
));
MACE_RETURN_IF_ERROR
(
output
->
ResizeImage
(
output_shape
,
output_image_shape
));
return
Deconv2dOpencl
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
]
,
paddings
_
.
data
(),
activation_
,
relux_max_limit_
,
return
Deconv2dOpencl
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
.
data
()
,
paddings
.
data
(),
activation_
,
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
&
input_shape_
,
output
,
future
,
&
kwg_size_
,
&
kernel_error_
);
}
...
...
mace/ops/deconv_2d.h
浏览文件 @
e9d1103b
...
...
@@ -19,23 +19,22 @@
#include "mace/core/operator.h"
#include "mace/kernels/deconv_2d.h"
#include "mace/ops/conv_pool_2d_base.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
Deconv2dOp
:
public
ConvPool2dOpBase
<
D
,
T
>
{
class
Deconv2dOp
:
public
Operator
<
D
,
T
>
{
public:
Deconv2dOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
ConvPool2dOpBase
<
D
,
T
>
(
op_def
,
ws
),
functor_
(
this
->
strides_
.
data
(),
this
->
padding_type_
,
this
->
paddings_
,
:
Operator
<
D
,
T
>
(
op_def
,
ws
),
functor_
(
OperatorBase
::
GetRepeatedArgs
<
int
>
(
"strides"
),
static_cast
<
Padding
>
(
OperatorBase
::
GetOptionalArg
<
int
>
(
"padding"
,
static_cast
<
int
>
(
SAME
))),
OperatorBase
::
GetRepeatedArgs
<
int
>
(
"padding_values"
),
OperatorBase
::
GetRepeatedArgs
<
index_t
>
(
"output_shape"
),
kernels
::
ActivationType
::
NOOP
,
0.0
f
,
OperatorBase
::
GetOptionalArg
<
bool
>
(
"from_caffe"
,
false
))
{}
0.0
f
)
{}
MaceStatus
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
...
...
mace/ops/deconv_2d_test.cc
浏览文件 @
e9d1103b
...
...
@@ -41,7 +41,6 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
input_shape
,
input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
filter_shape
,
filter_data
);
net
.
TransformDataFormat
<
D
,
float
>
(
"Filter"
,
HWOI
,
"FilterOIHW"
,
OIHW
);
bool
from_caffe
=
output_shape
.
size
()
!=
4
;
if
(
D
==
DeviceType
::
GPU
)
{
BufferToImage
<
D
,
float
>
(
&
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
...
...
@@ -55,7 +54,6 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
.
AddIntArg
(
"padding"
,
padding
)
.
AddIntsArg
(
"padding_values"
,
padding_size
)
.
AddIntsArg
(
"output_shape"
,
output_shape
)
.
AddIntArg
(
"from_caffe"
,
from_caffe
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
D
);
...
...
@@ -74,7 +72,6 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
.
AddIntArg
(
"padding"
,
padding
)
.
AddIntsArg
(
"padding_values"
,
padding_size
)
.
AddIntsArg
(
"output_shape"
,
output_shape
)
.
AddIntArg
(
"from_caffe"
,
from_caffe
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
...
...
@@ -89,7 +86,7 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
template
<
DeviceType
D
>
void
TestNHWCSimple3x3SAME_S1
()
{
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
1
,
Padding
::
SAME
,
{
0
,
0
},
{
1
,
3
,
3
,
3
},
{
3
,
3
,
3
,
1
},
{},
{
1
,
3
,
3
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
3
,
3
,
3
},
{
4
,
4
,
4
,
6
,
6
,
6
,
4
,
4
,
4
,
6
,
6
,
6
,
9
,
9
,
...
...
@@ -101,7 +98,7 @@ void TestNHWCSimple3x3SAME_S1() {
{
1
,
3
,
3
,
3
},
{
4
,
4
,
4
,
6
,
6
,
6
,
4
,
4
,
4
,
6
,
6
,
6
,
9
,
9
,
9
,
6
,
6
,
6
,
4
,
4
,
4
,
6
,
6
,
6
,
4
,
4
,
4
});
RunTestSimple
<
D
>
({
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
1
,
Padding
::
SAME
,
{
0
,
0
},
{
1
,
3
,
3
,
3
},
{
3
,
3
,
3
,
1
},
{},
{
1
,
3
,
3
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
},
{
1
,
3
,
3
,
3
},
{
54
,
66
,
78
,
126
,
147
,
168
,
130
,
146
,
162
,
...
...
@@ -119,7 +116,7 @@ void TestNHWCSimple3x3SAME_S1() {
template
<
DeviceType
D
>
void
TestNHWCSimple3x3SAME_S2
()
{
RunTestSimple
<
D
>
(
{
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
2
,
Padding
::
SAME
,
{
0
,
0
},
{
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
2
,
Padding
::
SAME
,
{},
{
1
,
6
,
6
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
6
,
6
,
3
},
...
...
@@ -137,7 +134,7 @@ void TestNHWCSimple3x3SAME_S2() {
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
4
,
4
,
4
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
1
,
1
,
1
});
RunTestSimple
<
D
>
(
{
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
2
,
Padding
::
SAME
,
{
0
,
0
},
{
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
2
,
Padding
::
SAME
,
{},
{
1
,
6
,
6
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
},
...
...
@@ -167,7 +164,7 @@ template <DeviceType D>
void
TestNHWCSimple3x3SAME_S2_1
()
{
RunTestSimple
<
D
>
(
{
1
,
3
,
3
,
1
},
{
12
,
18
,
12
,
18
,
27
,
18
,
12
,
18
,
12
},
2
,
Padding
::
SAME
,
{
0
,
0
},
{
1
,
5
,
5
,
3
},
{
3
,
3
,
3
,
1
},
{},
{
1
,
5
,
5
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
5
,
5
,
3
},
...
...
@@ -181,7 +178,7 @@ void TestNHWCSimple3x3SAME_S2_1() {
template
<
DeviceType
D
>
void
TestNHWCSimple3x3VALID_S2
()
{
RunTestSimple
<
D
>
(
{
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
2
,
Padding
::
VALID
,
{
0
,
0
},
{
1
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
2
,
Padding
::
VALID
,
{},
{
1
,
7
,
7
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
7
,
7
,
3
},
...
...
@@ -197,7 +194,7 @@ void TestNHWCSimple3x3VALID_S2() {
template
<
DeviceType
D
>
void
TestNHWCSimple3x3VALID_S1
()
{
RunTestSimple
<
D
>
(
{
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
1
,
Padding
::
VALID
,
{
0
,
0
},
{
1
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
},
1
,
Padding
::
VALID
,
{},
{
1
,
5
,
5
,
3
},
{
3
,
3
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
},
...
...
@@ -212,7 +209,7 @@ void TestNHWCSimple3x3VALID_S1() {
template
<
DeviceType
D
>
void
TestNHWCSimple2x2SAME
()
{
RunTestSimple
<
D
>
({
1
,
2
,
2
,
1
},
{
1
,
1
,
1
,
1
},
1
,
Padding
::
SAME
,
{
0
,
0
},
RunTestSimple
<
D
>
({
1
,
2
,
2
,
1
},
{
1
,
1
,
1
,
1
},
1
,
Padding
::
SAME
,
{},
{
1
,
2
,
2
,
1
},
{
3
,
3
,
1
,
1
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
{
1
,
2
,
2
,
1
},
{
4.
f
,
4.
f
,
4.
f
,
4.
f
});
...
...
@@ -221,7 +218,7 @@ void TestNHWCSimple2x2SAME() {
template
<
DeviceType
D
>
void
TestNHWCSimple2x2VALID
()
{
RunTestSimple
<
D
>
(
{
1
,
2
,
2
,
1
},
{
1
,
1
,
1
,
1
},
2
,
Padding
::
VALID
,
{
0
,
0
},
{
1
,
5
,
5
,
1
},
{
1
,
2
,
2
,
1
},
{
1
,
1
,
1
,
1
},
2
,
Padding
::
VALID
,
{},
{
1
,
5
,
5
,
1
},
{
3
,
3
,
1
,
1
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
{
1
,
5
,
5
,
1
},
{
1.
f
,
1.
f
,
2.
f
,
1.
f
,
1.
f
,
1.
f
,
1.
f
,
2.
f
,
1.
f
,
1.
f
,
2.
f
,
2.
f
,
4.
f
,
...
...
@@ -333,7 +330,6 @@ void TestComplexDeconvNxNS12(const int batch,
paddings
.
push_back
(
padding
);
paddings
.
push_back
(
padding
);
}
bool
from_caffe
=
output_shape
.
size
()
!=
4
;
// Construct graph
OpDefBuilder
(
"Deconv2D"
,
"Deconv2dTest"
)
.
Input
(
"InputNCHW"
)
...
...
@@ -344,7 +340,6 @@ void TestComplexDeconvNxNS12(const int batch,
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"padding_values"
,
paddings
)
.
AddIntsArg
(
"output_shape"
,
output_shape
)
.
AddIntArg
(
"from_caffe"
,
from_caffe
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
...
...
@@ -375,7 +370,6 @@ void TestComplexDeconvNxNS12(const int batch,
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"padding_values"
,
paddings
)
.
AddIntsArg
(
"output_shape"
,
output_shape
)
.
AddIntArg
(
"from_caffe"
,
from_caffe
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run on device
...
...
mace/ops/shape.h
浏览文件 @
e9d1103b
...
...
@@ -39,8 +39,20 @@ class ShapeOp : public Operator<D, T> {
Tensor
::
MappingGuard
output_guard
(
output
);
int32_t
*
output_data
=
output
->
mutable_data
<
int32_t
>
();
for
(
index_t
i
=
0
;
i
<
input
->
dim_size
();
++
i
)
{
output_data
[
i
]
=
input
->
dim
(
i
);
const
int
data_format
=
OperatorBase
::
GetOptionalArg
<
int
>
(
"data_format"
,
0
);
if
(
input
->
dim_size
()
==
4
&&
D
==
DeviceType
::
CPU
&&
data_format
==
DataFormat
::
NCHW
)
{
// transpose NCHW to NHWC for cpu runtime
output_data
[
0
]
=
static_cast
<
int32_t
>
(
input
->
dim
(
0
));
output_data
[
1
]
=
static_cast
<
int32_t
>
(
input
->
dim
(
2
));
output_data
[
2
]
=
static_cast
<
int32_t
>
(
input
->
dim
(
3
));
output_data
[
3
]
=
static_cast
<
int32_t
>
(
input
->
dim
(
1
));
}
else
{
for
(
unsigned
int
i
=
0
;
i
<
input
->
dim_size
();
++
i
)
{
output_data
[
i
]
=
static_cast
<
int32_t
>
(
input
->
dim
(
i
));
}
}
SetFutureDefaultWaitFn
(
future
);
...
...
mace/python/tools/converter_tool/base_converter.py
浏览文件 @
e9d1103b
...
...
@@ -167,7 +167,6 @@ class MaceKeyword(object):
mace_transpose_b_str
=
'transpose_b'
mace_op_data_type_str
=
'T'
mace_offset_str
=
'offset'
mace_from_caffe_str
=
'from_caffe'
mace_opencl_max_image_size
=
"opencl_max_image_size"
mace_seperate_buffer_str
=
'seperate_buffer'
mace_scalar_input_index_str
=
'scalar_input_index'
...
...
mace/python/tools/converter_tool/caffe_converter.py
浏览文件 @
e9d1103b
...
...
@@ -414,10 +414,6 @@ class CaffeConverter(base_converter.ConverterInterface):
op
.
type
=
MaceOp
.
Deconv2D
.
name
from_caffe_arg
=
op
.
arg
.
add
()
from_caffe_arg
.
name
=
MaceKeyword
.
mace_from_caffe_str
from_caffe_arg
.
i
=
1
self
.
add_stride_pad_kernel_arg
(
param
,
op
)
# dilation is specific for convolution in caffe
dilations
=
[
1
,
1
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录