Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
48e6ee96
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
48e6ee96
编写于
11月 09, 2018
作者:
L
liutuo
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add 2x2s2_deconv for neon and optimize deconv and dw_deconv ops' structure
上级
fc7f4967
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
618 addition
and
330 deletion
+618
-330
.gitignore
.gitignore
+6
-0
mace/ops/arm/deconv_2d_neon.h
mace/ops/arm/deconv_2d_neon.h
+12
-0
mace/ops/arm/deconv_2d_neon_2x2.cc
mace/ops/arm/deconv_2d_neon_2x2.cc
+262
-0
mace/ops/deconv_2d.cc
mace/ops/deconv_2d.cc
+123
-211
mace/ops/deconv_2d.h
mace/ops/deconv_2d.h
+168
-0
mace/ops/deconv_2d_test.cc
mace/ops/deconv_2d_test.cc
+22
-20
mace/ops/depthwise_deconv2d.cc
mace/ops/depthwise_deconv2d.cc
+25
-99
未找到文件。
.gitignore
浏览文件 @
48e6ee96
...
@@ -22,3 +22,9 @@ mace/examples/android/macelibrary/src/main/cpp/mace/
...
@@ -22,3 +22,9 @@ mace/examples/android/macelibrary/src/main/cpp/mace/
*swp
*swp
*~
*~
.python-version
.python-version
mace/examples/android/macelibrary/src/main/cpp/include/mace/public/
mace/examples/android/macelibrary/src/main/cpp/lib/arm64-v8a/
mace/examples/android/macelibrary/src/main/jniLibs/arm64-v8a/
mace/ops/arm/deconv_2d_neon.h
浏览文件 @
48e6ee96
...
@@ -21,6 +21,18 @@
...
@@ -21,6 +21,18 @@
namespace
mace
{
namespace
mace
{
namespace
ops
{
namespace
ops
{
void
Deconv2dNeonK2x2S1
(
const
float
*
input
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
float
*
output
);
void
Deconv2dNeonK2x2S2
(
const
float
*
input
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
float
*
output
);
void
Deconv2dNeonK3x3S1
(
const
float
*
input
,
void
Deconv2dNeonK3x3S1
(
const
float
*
input
,
const
float
*
filter
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
in_shape
,
...
...
mace/ops/arm/deconv_2d_neon_2x2.cc
0 → 100644
浏览文件 @
48e6ee96
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/macros.h"
#include "mace/ops/arm/deconv_2d_neon.h"
namespace
mace
{
namespace
ops
{
void
Deconv2dNeonK2x2S1
(
const
float
*
input
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
float
*
output
)
{
const
index_t
inch
=
in_shape
[
1
];
const
index_t
h
=
in_shape
[
2
];
const
index_t
w
=
in_shape
[
3
];
const
index_t
outch
=
out_shape
[
1
];
const
index_t
outh
=
out_shape
[
2
];
const
index_t
outw
=
out_shape
[
3
];
const
index_t
out_img_size
=
outh
*
outw
;
#pragma omp parallel for collapse(2)
for
(
index_t
b
=
0
;
b
<
out_shape
[
0
];
++
b
)
{
for
(
index_t
oc
=
0
;
oc
<
outch
;
oc
+=
2
)
{
if
(
oc
+
1
<
outch
)
{
float
*
out_base0
=
output
+
(
b
*
outch
+
oc
)
*
out_img_size
;
float
*
out_base1
=
out_base0
+
out_img_size
;
for
(
index_t
ic
=
0
;
ic
<
inch
;
++
ic
)
{
const
float
*
input_base
=
input
+
(
b
*
inch
+
ic
)
*
h
*
w
;
const
float
*
kernel_base0
=
filter
+
(
oc
*
inch
+
ic
)
*
4
;
const
float
*
kernel_base1
=
kernel_base0
+
inch
*
4
;
const
float
*
in
=
input_base
;
// output channel 0
const
float
*
k0
=
kernel_base0
;
// output channel 1
const
float
*
k1
=
kernel_base1
;
#if defined(MACE_ENABLE_NEON)
// load filter
float32x4_t
k0_vec
=
vld1q_f32
(
k0
);
float32x4_t
k1_vec
=
vld1q_f32
(
k1
);
#endif
for
(
index_t
i
=
0
;
i
<
h
;
++
i
)
{
float
*
out_row_base0
=
out_base0
+
i
*
outw
;
float
*
out_row0_0
=
out_row_base0
;
float
*
out_row0_1
=
out_row_base0
+
outw
;
float
*
out_row_base1
=
out_base1
+
i
*
outw
;
float
*
out_row1_0
=
out_row_base1
;
float
*
out_row1_1
=
out_row_base1
+
outw
;
index_t
j
=
0
;
#if defined(MACE_ENABLE_NEON)
for
(;
j
+
3
<
w
;
j
+=
4
)
{
float32x4_t
in_vec
=
vld1q_f32
(
in
);
float32x4_t
out00
,
out01
,
out02
,
out03
;
float32x4_t
out10
,
out11
,
out12
,
out13
;
out00
=
vld1q_f32
(
out_row0_0
);
out00
=
neon_vfma_lane_0
(
out00
,
in_vec
,
k0_vec
);
vst1q_f32
(
out_row0_0
,
out00
);
out01
=
vld1q_f32
(
out_row0_0
+
1
);
out01
=
neon_vfma_lane_1
(
out01
,
in_vec
,
k0_vec
);
vst1q_f32
(
out_row0_0
+
1
,
out01
);
out02
=
vld1q_f32
(
out_row0_1
);
out02
=
neon_vfma_lane_2
(
out02
,
in_vec
,
k0_vec
);
vst1q_f32
(
out_row0_1
,
out02
);
out03
=
vld1q_f32
(
out_row0_1
+
1
);
out03
=
neon_vfma_lane_3
(
out03
,
in_vec
,
k0_vec
);
vst1q_f32
(
out_row0_1
+
1
,
out03
);
out10
=
vld1q_f32
(
out_row1_0
);
out10
=
neon_vfma_lane_0
(
out10
,
in_vec
,
k1_vec
);
vst1q_f32
(
out_row1_0
,
out10
);
out11
=
vld1q_f32
(
out_row1_0
+
1
);
out11
=
neon_vfma_lane_1
(
out11
,
in_vec
,
k1_vec
);
vst1q_f32
(
out_row1_0
+
1
,
out11
);
out12
=
vld1q_f32
(
out_row1_1
);
out12
=
neon_vfma_lane_2
(
out12
,
in_vec
,
k1_vec
);
vst1q_f32
(
out_row1_1
,
out12
);
out13
=
vld1q_f32
(
out_row1_1
+
1
);
out13
=
neon_vfma_lane_3
(
out13
,
in_vec
,
k1_vec
);
vst1q_f32
(
out_row1_1
+
1
,
out13
);
in
+=
4
;
out_row0_0
+=
4
;
out_row0_1
+=
4
;
out_row1_0
+=
4
;
out_row1_1
+=
4
;
}
#endif
for
(;
j
<
w
;
++
j
)
{
float
val
=
in
[
0
];
for
(
int
k
=
0
;
k
<
2
;
++
k
)
{
out_row0_0
[
k
]
+=
val
*
k0
[
k
];
out_row0_1
[
k
]
+=
val
*
k0
[
k
+
2
];
out_row1_0
[
k
]
+=
val
*
k1
[
k
];
out_row1_1
[
k
]
+=
val
*
k1
[
k
+
2
];
}
in
++
;
out_row0_0
++
;
out_row0_1
++
;
out_row1_0
++
;
out_row1_1
++
;
}
}
}
}
else
{
float
*
out_base0
=
output
+
(
b
*
outch
+
oc
)
*
outh
*
outw
;
for
(
index_t
ic
=
0
;
ic
<
inch
;
++
ic
)
{
const
float
*
input_base
=
input
+
(
b
*
inch
+
ic
)
*
h
*
w
;
const
float
*
kernel_base0
=
filter
+
(
oc
*
inch
+
ic
)
*
4
;
const
float
*
in
=
input_base
;
const
float
*
k0
=
kernel_base0
;
#if defined(MACE_ENABLE_NEON)
// load filter
float32x4_t
k0_vec
=
vld1q_f32
(
k0
);
#endif
for
(
index_t
i
=
0
;
i
<
h
;
++
i
)
{
float
*
out_row_base0
=
out_base0
+
i
*
outw
;
float
*
out_row0_0
=
out_row_base0
;
float
*
out_row0_1
=
out_row_base0
+
outw
;
index_t
j
=
0
;
#if defined(MACE_ENABLE_NEON)
for
(;
j
+
3
<
w
;
j
+=
4
)
{
float32x4_t
in_vec
=
vld1q_f32
(
in
);
float32x4_t
out00
,
out01
,
out02
,
out03
;
out00
=
vld1q_f32
(
out_row0_0
);
out00
=
neon_vfma_lane_0
(
out00
,
in_vec
,
k0_vec
);
vst1q_f32
(
out_row0_0
,
out00
);
out01
=
vld1q_f32
(
out_row0_0
+
1
);
out01
=
neon_vfma_lane_1
(
out01
,
in_vec
,
k0_vec
);
vst1q_f32
(
out_row0_0
+
1
,
out01
);
out02
=
vld1q_f32
(
out_row0_1
);
out02
=
neon_vfma_lane_2
(
out02
,
in_vec
,
k0_vec
);
vst1q_f32
(
out_row0_1
,
out02
);
out03
=
vld1q_f32
(
out_row0_1
+
1
);
out03
=
neon_vfma_lane_3
(
out03
,
in_vec
,
k0_vec
);
vst1q_f32
(
out_row0_1
+
1
,
out03
);
in
+=
4
;
out_row0_0
+=
4
;
out_row0_1
+=
4
;
}
#endif
for
(;
j
<
w
;
++
j
)
{
float
val
=
in
[
0
];
for
(
int
k
=
0
;
k
<
2
;
++
k
)
{
out_row0_0
[
k
]
+=
val
*
k0
[
k
];
out_row0_1
[
k
]
+=
val
*
k0
[
k
+
2
];
}
in
++
;
out_row0_0
++
;
out_row0_1
++
;
}
}
}
}
}
}
}
void
Deconv2dNeonK2x2S2
(
const
float
*
input
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
float
*
output
)
{
const
index_t
inch
=
in_shape
[
1
];
const
index_t
h
=
in_shape
[
2
];
const
index_t
w
=
in_shape
[
3
];
const
index_t
outch
=
out_shape
[
1
];
const
index_t
outh
=
out_shape
[
2
];
const
index_t
outw
=
out_shape
[
3
];
const
index_t
out_img_size
=
outh
*
outw
;
#pragma omp parallel for collapse(2)
for
(
index_t
b
=
0
;
b
<
out_shape
[
0
];
++
b
)
{
for
(
index_t
oc
=
0
;
oc
<
outch
;
++
oc
)
{
float
*
out_base
=
output
+
(
b
*
outch
+
oc
)
*
out_img_size
;
for
(
index_t
ic
=
0
;
ic
<
inch
;
++
ic
)
{
const
float
*
input_base
=
input
+
(
b
*
inch
+
ic
)
*
h
*
w
;
const
float
*
kernel_base
=
filter
+
(
oc
*
inch
+
ic
)
*
4
;
const
float
*
in
=
input_base
;
const
float
*
k0
=
kernel_base
;
#if defined(MACE_ENABLE_NEON)
float32x4_t
k0_vec
=
vld1q_f32
(
k0
);
#endif
for
(
index_t
i
=
0
;
i
<
h
;
++
i
)
{
float
*
out_row_base
=
out_base
+
i
*
2
*
outw
;
float
*
out_row_0
=
out_row_base
;
float
*
out_row_1
=
out_row_0
+
outw
;
index_t
j
=
0
;
#if defined(MACE_ENABLE_NEON)
for
(;
j
+
3
<
w
;
j
+=
4
)
{
float32x4_t
in_vec
=
vld1q_f32
(
in
);
// out row 0
float32x4x2_t
out00
=
vld2q_f32
(
out_row_0
);
out00
.
val
[
0
]
=
neon_vfma_lane_0
(
out00
.
val
[
0
],
in_vec
,
k0_vec
);
out00
.
val
[
1
]
=
neon_vfma_lane_1
(
out00
.
val
[
1
],
in_vec
,
k0_vec
);
vst2q_f32
(
out_row_0
,
out00
);
// out row 1
float32x4x2_t
out10
=
vld2q_f32
(
out_row_1
);
out10
.
val
[
0
]
=
neon_vfma_lane_2
(
out10
.
val
[
0
],
in_vec
,
k0_vec
);
out10
.
val
[
1
]
=
neon_vfma_lane_3
(
out10
.
val
[
1
],
in_vec
,
k0_vec
);
vst2q_f32
(
out_row_1
,
out10
);
in
+=
4
;
out_row_0
+=
8
;
out_row_1
+=
8
;
}
#endif
for
(;
j
<
w
;
++
j
)
{
float
val
=
in
[
0
];
for
(
int
k
=
0
;
k
<
2
;
++
k
)
{
out_row_0
[
k
]
+=
val
*
k0
[
k
];
out_row_1
[
k
]
+=
val
*
k0
[
k
+
2
];
}
in
++
;
out_row_0
+=
2
;
out_row_1
+=
2
;
}
}
}
}
}
}
}
// namespace ops
}
// namespace mace
mace/ops/deconv_2d.cc
浏览文件 @
48e6ee96
...
@@ -25,11 +25,9 @@
...
@@ -25,11 +25,9 @@
#include <vector>
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/operator.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
#include "mace/ops/activation.h"
#include "mace/ops/activation.h"
#include "mace/ops/arm/deconv_2d_neon.h"
#include "mace/ops/arm/deconv_2d_neon.h"
#include "mace/ops/conv_pool_2d_util.h"
#include "mace/utils/utils.h"
#include "mace/utils/utils.h"
#ifdef MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/deconv_2d.h"
#include "mace/ops/opencl/image/deconv_2d.h"
...
@@ -38,134 +36,6 @@
...
@@ -38,134 +36,6 @@
namespace
mace
{
namespace
mace
{
namespace
ops
{
namespace
ops
{
class
Deconv2dOpBase
:
public
Operation
{
public:
explicit
Deconv2dOpBase
(
OpConstructContext
*
context
)
:
Operation
(
context
),
strides_
(
Operation
::
GetRepeatedArgs
<
int
>
(
"strides"
)),
padding_type_
(
static_cast
<
Padding
>
(
Operation
::
GetOptionalArg
<
int
>
(
"padding"
,
static_cast
<
int
>
(
SAME
)))),
paddings_
(
Operation
::
GetRepeatedArgs
<
int
>
(
"padding_values"
)),
model_type_
(
static_cast
<
ops
::
FrameworkType
>
(
Operation
::
GetOptionalArg
<
int
>
(
"framework_type"
,
0
))),
activation_
(
ops
::
StringToActivationType
(
Operation
::
GetOptionalArg
<
std
::
string
>
(
"activation"
,
"NOOP"
))),
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
))
{}
static
void
CalcDeconvOutputSize
(
const
index_t
*
input_shape
,
// NHWC
const
index_t
*
filter_shape
,
// OIHW
const
int
*
strides
,
index_t
*
output_shape
,
const
int
*
padding_size
,
int
*
input_padding
,
const
bool
isNCHW
=
false
)
{
MACE_CHECK_NOTNULL
(
output_shape
);
MACE_CHECK_NOTNULL
(
padding_size
);
MACE_CHECK_NOTNULL
(
input_shape
);
MACE_CHECK_NOTNULL
(
filter_shape
);
MACE_CHECK_NOTNULL
(
strides
);
const
index_t
output_channel
=
filter_shape
[
0
];
const
index_t
in_height
=
isNCHW
?
input_shape
[
2
]
:
input_shape
[
1
];
const
index_t
in_width
=
isNCHW
?
input_shape
[
3
]
:
input_shape
[
2
];
const
index_t
kernel_h
=
filter_shape
[
2
];
const
index_t
kernel_w
=
filter_shape
[
3
];
input_padding
[
0
]
=
static_cast
<
int
>
((
kernel_h
-
1
)
*
2
-
padding_size
[
0
]);
input_padding
[
1
]
=
static_cast
<
int
>
((
kernel_w
-
1
)
*
2
-
padding_size
[
1
]);
input_padding
[
0
]
=
std
::
max
<
int
>
(
0
,
input_padding
[
0
]);
input_padding
[
1
]
=
std
::
max
<
int
>
(
0
,
input_padding
[
1
]);
index_t
out_height
=
(
in_height
-
1
)
*
strides
[
0
]
+
kernel_h
-
padding_size
[
0
];
index_t
out_width
=
(
in_width
-
1
)
*
strides
[
1
]
+
kernel_w
-
padding_size
[
1
];
output_shape
[
0
]
=
input_shape
[
0
];
if
(
isNCHW
)
{
output_shape
[
1
]
=
output_channel
;
output_shape
[
2
]
=
out_height
;
output_shape
[
3
]
=
out_width
;
}
else
{
output_shape
[
1
]
=
out_height
;
output_shape
[
2
]
=
out_width
;
output_shape
[
3
]
=
output_channel
;
}
}
static
void
CalcDeconvPaddingAndInputSize
(
const
index_t
*
input_shape
,
// NHWC
const
index_t
*
filter_shape
,
// OIHW
const
int
*
strides
,
Padding
padding
,
const
index_t
*
output_shape
,
int
*
padding_size
,
const
bool
isNCHW
=
false
)
{
MACE_CHECK_NOTNULL
(
output_shape
);
MACE_CHECK_NOTNULL
(
padding_size
);
MACE_CHECK_NOTNULL
(
input_shape
);
MACE_CHECK_NOTNULL
(
filter_shape
);
MACE_CHECK_NOTNULL
(
strides
);
const
index_t
in_height
=
isNCHW
?
input_shape
[
2
]
:
input_shape
[
1
];
const
index_t
in_width
=
isNCHW
?
input_shape
[
3
]
:
input_shape
[
2
];
const
index_t
out_height
=
isNCHW
?
output_shape
[
2
]
:
output_shape
[
1
];
const
index_t
out_width
=
isNCHW
?
output_shape
[
3
]
:
output_shape
[
2
];
const
index_t
extended_input_height
=
(
in_height
-
1
)
*
strides
[
0
]
+
1
;
const
index_t
extended_input_width
=
(
in_width
-
1
)
*
strides
[
1
]
+
1
;
const
index_t
filter_h
=
filter_shape
[
2
];
const
index_t
filter_w
=
filter_shape
[
3
];
index_t
expected_input_height
=
0
,
expected_input_width
=
0
;
switch
(
padding
)
{
case
VALID
:
expected_input_height
=
(
out_height
-
filter_h
+
strides
[
0
])
/
strides
[
0
];
expected_input_width
=
(
out_width
-
filter_w
+
strides
[
1
])
/
strides
[
1
];
break
;
case
SAME
:
expected_input_height
=
(
out_height
+
strides
[
0
]
-
1
)
/
strides
[
0
];
expected_input_width
=
(
out_width
+
strides
[
1
]
-
1
)
/
strides
[
1
];
break
;
default:
MACE_CHECK
(
false
,
"Unsupported padding type: "
,
padding
);
}
MACE_CHECK
(
expected_input_height
==
in_height
,
expected_input_height
,
"!="
,
in_height
);
MACE_CHECK
(
expected_input_width
==
in_width
,
expected_input_width
,
"!="
,
in_width
);
const
int
p_h
=
static_cast
<
int
>
(
out_height
+
filter_h
-
1
-
extended_input_height
);
const
int
p_w
=
static_cast
<
int
>
(
out_width
+
filter_w
-
1
-
extended_input_width
);
padding_size
[
0
]
=
std
::
max
<
int
>
(
0
,
p_h
);
padding_size
[
1
]
=
std
::
max
<
int
>
(
0
,
p_w
);
}
protected:
std
::
vector
<
int
>
strides_
;
// [stride_h, stride_w]
const
Padding
padding_type_
;
std
::
vector
<
int
>
paddings_
;
const
FrameworkType
model_type_
;
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
};
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
Deconv2dOp
;
class
Deconv2dOp
;
...
@@ -193,56 +63,65 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
...
@@ -193,56 +63,65 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
MACE_CHECK_NOTNULL
(
filter
);
MACE_CHECK_NOTNULL
(
filter
);
MACE_CHECK_NOTNULL
(
output
);
MACE_CHECK_NOTNULL
(
output
);
std
::
vector
<
int
>
paddings
(
2
);
std
::
vector
<
int
>
in_paddings
(
2
,
0
);
std
::
vector
<
int
>
out_paddings
(
2
);
std
::
vector
<
int
>
out_paddings
(
2
,
0
);
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
out_shape
(
4
,
0
);
std
::
vector
<
index_t
>
padded_out_shape
(
4
,
0
);
if
(
model_type_
==
FrameworkType
::
TENSORFLOW
)
{
// tensorflow
if
(
model_type_
==
FrameworkType
::
TENSORFLOW
)
{
// tensorflow
paddings
=
std
::
vector
<
int
>
(
2
,
0
);
MACE_CHECK_NOTNULL
(
output_shape_tensor
);
MACE_CHECK_NOTNULL
(
output_shape_tensor
);
MACE_CHECK
(
output_shape_tensor
->
size
()
==
4
);
MACE_CHECK
(
output_shape_tensor
->
size
()
==
4
);
Tensor
::
MappingGuard
output_shape_mapper
(
output_shape_tensor
);
Tensor
::
MappingGuard
output_shape_mapper
(
output_shape_tensor
);
auto
output_shape_data
=
auto
output_shape_data
=
output_shape_tensor
->
data
<
int32_t
>
();
output_shape_tensor
->
data
<
int32_t
>
();
out
put
_shape
=
out_shape
=
std
::
vector
<
index_t
>
(
output_shape_data
,
output_shape_data
+
4
);
std
::
vector
<
index_t
>
(
output_shape_data
,
output_shape_data
+
4
);
const
index_t
t
=
out
put
_shape
[
1
];
const
index_t
t
=
out_shape
[
1
];
out
put_shape
[
1
]
=
outp
ut_shape
[
3
];
out
_shape
[
1
]
=
o
ut_shape
[
3
];
out
put_shape
[
3
]
=
outp
ut_shape
[
2
];
out
_shape
[
3
]
=
o
ut_shape
[
2
];
out
put
_shape
[
2
]
=
t
;
out_shape
[
2
]
=
t
;
CalcDeconv
PaddingAndInputSize
(
CalcDeconv
Shape_TF
(
input
->
shape
().
data
(),
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
filter
->
shape
().
data
(),
strides_
.
data
(),
padding_type_
,
out_shape
.
data
(),
output_shape
.
data
(),
strides_
.
data
(),
paddings
.
data
(),
true
);
1
,
padding_type_
,
in_paddings
.
data
(),
out_paddings
.
data
(),
padded_out_shape
.
data
(),
true
);
}
else
{
// caffe
}
else
{
// caffe
out_paddings
=
paddings_
;
if
(
!
paddings_
.
empty
())
out_paddings
=
paddings_
;
output_shape
=
std
::
vector
<
index_t
>
(
4
,
0
);
CalcDeconvShape_Caffe
(
CalcDeconvOutputSize
(
input
->
shape
().
data
(),
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
filter
->
shape
().
data
(),
strides_
.
data
(),
strides_
.
data
(),
output_shape
.
data
(),
out_paddings
.
data
(),
out_paddings
.
data
(),
1
,
paddings
.
data
(),
in_paddings
.
data
(),
true
);
out_shape
.
data
(),
padded_out_shape
.
data
(),
true
);
}
}
MACE_RETURN_IF_ERROR
(
output
->
Resize
(
output_shape
));
MACE_RETURN_IF_ERROR
(
output
->
Resize
(
out_shape
));
output
->
Clear
();
index_t
kernel_h
=
filter
->
dim
(
2
);
index_t
kernel_h
=
filter
->
dim
(
2
);
index_t
kernel_w
=
filter
->
dim
(
3
);
index_t
kernel_w
=
filter
->
dim
(
3
);
const
index_t
*
in_shape
=
input
->
shape
().
data
();
const
index_t
*
in_shape
=
input
->
shape
().
data
();
MACE_CHECK
(
filter
->
dim
(
0
)
==
out
put
_shape
[
1
],
filter
->
dim
(
0
),
" != "
,
MACE_CHECK
(
filter
->
dim
(
0
)
==
out_shape
[
1
],
filter
->
dim
(
0
),
" != "
,
out
put
_shape
[
1
]);
out_shape
[
1
]);
MACE_CHECK
(
filter
->
dim
(
1
)
==
in_shape
[
1
],
filter
->
dim
(
1
),
" != "
,
MACE_CHECK
(
filter
->
dim
(
1
)
==
in_shape
[
1
],
filter
->
dim
(
1
),
" != "
,
in_shape
[
1
]);
in_shape
[
1
]);
MACE_CHECK
(
in_shape
[
0
]
==
out
put
_shape
[
0
],
MACE_CHECK
(
in_shape
[
0
]
==
out_shape
[
0
],
"Input/Output batch size mismatch"
);
"Input/Output batch size mismatch"
);
std
::
function
<
void
(
const
float
*
input
,
std
::
function
<
void
(
const
float
*
input
,
const
float
*
filter
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
const
index_t
*
out
put
_shape
,
float
*
output
)
>
deconv_func
;
float
*
output
)
>
deconv_func
;
Tensor
::
MappingGuard
input_mapper
(
input
);
Tensor
::
MappingGuard
input_mapper
(
input
);
...
@@ -254,13 +133,9 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
...
@@ -254,13 +133,9 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
auto
bias_data
=
bias
==
nullptr
?
nullptr
:
bias
->
data
<
float
>
();
auto
bias_data
=
bias
==
nullptr
?
nullptr
:
bias
->
data
<
float
>
();
auto
output_data
=
output
->
mutable_data
<
float
>
();
auto
output_data
=
output
->
mutable_data
<
float
>
();
const
index_t
padded_out_h
=
(
in_shape
[
2
]
-
1
)
*
strides_
[
0
]
+
kernel_h
;
const
index_t
pad_h
=
out_paddings
[
0
]
/
2
;
const
index_t
padded_out_w
=
(
in_shape
[
3
]
-
1
)
*
strides_
[
1
]
+
kernel_w
;
const
index_t
pad_w
=
out_paddings
[
1
]
/
2
;
const
index_t
pad_h
=
(
padded_out_h
-
output_shape
[
2
])
/
2
;
const
index_t
pad_w
=
(
padded_out_w
-
output_shape
[
3
])
/
2
;
std
::
vector
<
index_t
>
padded_out_shape
({
output_shape
[
0
],
output_shape
[
1
],
padded_out_h
,
padded_out_w
});
index_t
padded_out_size
=
index_t
padded_out_size
=
std
::
accumulate
(
padded_out_shape
.
begin
(),
std
::
accumulate
(
padded_out_shape
.
begin
(),
padded_out_shape
.
end
(),
padded_out_shape
.
end
(),
...
@@ -274,6 +149,11 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
...
@@ -274,6 +149,11 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
padded_out
.
Clear
();
padded_out
.
Clear
();
auto
*
padded_out_data
=
padded_out
.
mutable_data
<
float
>
();
auto
*
padded_out_data
=
padded_out
.
mutable_data
<
float
>
();
bool
use_neon_2x2_s1
=
kernel_h
==
kernel_w
&&
kernel_h
==
2
&&
strides_
[
0
]
==
strides_
[
1
]
&&
strides_
[
0
]
==
1
;
bool
use_neon_2x2_s2
=
kernel_h
==
kernel_w
&&
kernel_h
==
2
&&
strides_
[
0
]
==
strides_
[
1
]
&&
strides_
[
0
]
==
2
;
bool
use_neon_3x3_s1
=
kernel_h
==
kernel_w
&&
kernel_h
==
3
&&
bool
use_neon_3x3_s1
=
kernel_h
==
kernel_w
&&
kernel_h
==
3
&&
strides_
[
0
]
==
strides_
[
1
]
&&
strides_
[
0
]
==
1
;
strides_
[
0
]
==
strides_
[
1
]
&&
strides_
[
0
]
==
1
;
bool
use_neon_3x3_s2
=
kernel_h
==
kernel_w
&&
kernel_h
==
3
&&
bool
use_neon_3x3_s2
=
kernel_h
==
kernel_w
&&
kernel_h
==
3
&&
...
@@ -284,73 +164,98 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
...
@@ -284,73 +164,98 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
bool
use_neon_4x4_s2
=
kernel_h
==
kernel_w
&&
kernel_h
==
4
&&
bool
use_neon_4x4_s2
=
kernel_h
==
kernel_w
&&
kernel_h
==
4
&&
strides_
[
0
]
==
strides_
[
1
]
&&
strides_
[
0
]
==
2
;
strides_
[
0
]
==
strides_
[
1
]
&&
strides_
[
0
]
==
2
;
if
(
use_neon_
3x3
_s1
)
{
if
(
use_neon_
2x2
_s1
)
{
deconv_func
=
[
=
](
const
float
*
input
,
deconv_func
=
[
=
](
const
float
*
input
,
const
float
*
filter
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
input_shape
,
const
index_t
*
padded_out_shape
,
const
index_t
*
padded_output_shape
,
float
*
padded_output
)
{
Deconv2dNeonK2x2S1
(
input
,
filter
,
input_shape
,
padded_output_shape
,
padded_output
);
};
}
else
if
(
use_neon_2x2_s2
)
{
deconv_func
=
[
=
](
const
float
*
input
,
const
float
*
filter
,
const
index_t
*
input_shape
,
const
index_t
*
padded_output_shape
,
float
*
padded_output
)
{
Deconv2dNeonK2x2S2
(
input
,
filter
,
input_shape
,
padded_output_shape
,
padded_output
);
};
}
else
if
(
use_neon_3x3_s1
)
{
deconv_func
=
[
=
](
const
float
*
input
,
const
float
*
filter
,
const
index_t
*
input_shape
,
const
index_t
*
padded_output_shape
,
float
*
padded_output
)
{
float
*
padded_output
)
{
Deconv2dNeonK3x3S1
(
input
,
Deconv2dNeonK3x3S1
(
input
,
filter
,
filter
,
in_shape
,
in
put
_shape
,
padded_out_shape
,
padded_out
put
_shape
,
padded_output
);
padded_output
);
};
};
}
else
if
(
use_neon_3x3_s2
)
{
}
else
if
(
use_neon_3x3_s2
)
{
deconv_func
=
[
=
](
const
float
*
input
,
deconv_func
=
[
=
](
const
float
*
input
,
const
float
*
filter
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
in
put
_shape
,
const
index_t
*
padded_out_shape
,
const
index_t
*
padded_out
put
_shape
,
float
*
padded_output
)
{
float
*
padded_output
)
{
Deconv2dNeonK3x3S2
(
input
,
Deconv2dNeonK3x3S2
(
input
,
filter
,
filter
,
in_shape
,
in
put
_shape
,
padded_out_shape
,
padded_out
put
_shape
,
padded_output
);
padded_output
);
};
};
}
else
if
(
use_neon_4x4_s1
)
{
}
else
if
(
use_neon_4x4_s1
)
{
deconv_func
=
[
=
](
const
float
*
input
,
deconv_func
=
[
=
](
const
float
*
input
,
const
float
*
filter
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
in
put
_shape
,
const
index_t
*
padded_out_shape
,
const
index_t
*
padded_out
put
_shape
,
float
*
padded_output
)
{
float
*
padded_output
)
{
Deconv2dNeonK4x4S1
(
input
,
Deconv2dNeonK4x4S1
(
input
,
filter
,
filter
,
in_shape
,
in
put
_shape
,
padded_out_shape
,
padded_out
put
_shape
,
padded_output
);
padded_output
);
};
};
}
else
if
(
use_neon_4x4_s2
)
{
}
else
if
(
use_neon_4x4_s2
)
{
deconv_func
=
[
=
](
const
float
*
input
,
deconv_func
=
[
=
](
const
float
*
input
,
const
float
*
filter
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
in
put
_shape
,
const
index_t
*
padded_out_shape
,
const
index_t
*
padded_out
put
_shape
,
float
*
padded_output
)
{
float
*
padded_output
)
{
Deconv2dNeonK4x4S2
(
input
,
Deconv2dNeonK4x4S2
(
input
,
filter
,
filter
,
in_shape
,
in
put
_shape
,
padded_out_shape
,
padded_out
put
_shape
,
padded_output
);
padded_output
);
};
};
}
else
{
}
else
{
deconv_func
=
[
=
](
const
float
*
input
,
deconv_func
=
[
=
](
const
float
*
input
,
const
float
*
filter
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
in
put
_shape
,
const
index_t
*
padded_out_shape
,
const
index_t
*
padded_out
put
_shape
,
float
*
padded_output
)
{
float
*
padded_output
)
{
Deconv2dGeneral
(
input
,
Deconv2dGeneral
(
input
,
filter
,
filter
,
kernel_h
,
kernel_h
,
kernel_w
,
kernel_w
,
strides_
.
data
(),
strides_
.
data
(),
in_shape
,
in
put
_shape
,
padded_out_shape
,
padded_out
put
_shape
,
padded_output
);
padded_output
);
};
};
}
}
bool
no_pad
=
bool
no_pad
=
padded_out_h
==
output_shape
[
2
]
&&
padded_out_w
==
output_shape
[
3
];
(
padded_out_shape
[
2
]
==
out_shape
[
2
])
&&
(
padded_out_shape
[
3
]
==
out_shape
[
3
]);
float
*
out_data
=
no_pad
?
output_data
:
padded_out_data
;
float
*
out_data
=
no_pad
?
output_data
:
padded_out_data
;
deconv_func
(
input_data
,
deconv_func
(
input_data
,
...
@@ -361,16 +266,16 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
...
@@ -361,16 +266,16 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
if
(
!
no_pad
)
{
if
(
!
no_pad
)
{
CropPadOut
<
float
>
(
out_data
,
CropPadOut
<
float
>
(
out_data
,
padded_out_shape
.
data
(),
padded_out_shape
.
data
(),
out
put
_shape
.
data
(),
out_shape
.
data
(),
pad_h
,
pad_h
,
pad_w
,
pad_w
,
output_data
);
output_data
);
}
}
if
(
bias_data
!=
nullptr
)
{
if
(
bias_data
!=
nullptr
)
{
const
index_t
batch
=
out
put
_shape
[
0
];
const
index_t
batch
=
out_shape
[
0
];
const
index_t
channels
=
out
put
_shape
[
1
];
const
index_t
channels
=
out_shape
[
1
];
const
index_t
img_size
=
out
put_shape
[
2
]
*
outp
ut_shape
[
3
];
const
index_t
img_size
=
out
_shape
[
2
]
*
o
ut_shape
[
3
];
#pragma omp parallel for collapse(3)
#pragma omp parallel for collapse(3)
for
(
index_t
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
index_t
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
...
@@ -476,39 +381,46 @@ class Deconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
...
@@ -476,39 +381,46 @@ class Deconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
MACE_CHECK_NOTNULL
(
input
);
MACE_CHECK_NOTNULL
(
input
);
MACE_CHECK_NOTNULL
(
filter
);
MACE_CHECK_NOTNULL
(
filter
);
MACE_CHECK_NOTNULL
(
output
);
MACE_CHECK_NOTNULL
(
output
);
std
::
vector
<
int
>
paddings
(
2
);
std
::
vector
<
int
>
out_paddings
(
2
);
std
::
vector
<
int
>
in_paddings
(
2
,
0
);
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
out_shape
(
4
,
0
);
if
(
model_type_
==
FrameworkType
::
TENSORFLOW
)
{
if
(
model_type_
==
FrameworkType
::
TENSORFLOW
)
{
paddings
=
std
::
vector
<
int
>
(
2
,
0
);
MACE_CHECK_NOTNULL
(
output_shape_tensor
);
MACE_CHECK_NOTNULL
(
output_shape_tensor
);
MACE_CHECK
(
output_shape_tensor
->
size
()
==
4
);
MACE_CHECK
(
output_shape_tensor
->
size
()
==
4
);
Tensor
::
MappingGuard
output_shape_mapper
(
output_shape_tensor
);
Tensor
::
MappingGuard
output_shape_mapper
(
output_shape_tensor
);
auto
output_shape_data
=
auto
output_shape_data
=
output_shape_tensor
->
data
<
int32_t
>
();
output_shape_tensor
->
data
<
int32_t
>
();
out
put
_shape
=
out_shape
=
std
::
vector
<
index_t
>
(
output_shape_data
,
output_shape_data
+
4
);
std
::
vector
<
index_t
>
(
output_shape_data
,
output_shape_data
+
4
);
CalcDeconvPaddingAndInputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
CalcDeconvShape_TF
(
strides_
.
data
(),
input
->
shape
().
data
(),
padding_type_
,
filter
->
shape
().
data
(),
output_shape
.
data
(),
out_shape
.
data
(),
paddings
.
data
());
strides_
.
data
(),
1
,
padding_type_
,
in_paddings
.
data
(),
nullptr
,
nullptr
);
}
else
{
}
else
{
out_paddings
=
paddings_
;
std
::
vector
<
int
>
out_paddings
(
2
,
0
);
paddings
=
std
::
vector
<
int
>
(
2
,
0
);
if
(
!
paddings_
.
empty
())
out_paddings
=
paddings_
;
output_shape
=
std
::
vector
<
index_t
>
(
4
,
0
);
CalcDeconvShape_Caffe
(
CalcDeconvOutputSize
(
input
->
shape
().
data
(),
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
filter
->
shape
().
data
(),
strides_
.
data
(),
strides_
.
data
(),
output_shape
.
data
(),
out_paddings
.
data
(),
out_paddings
.
data
(),
1
,
paddings
.
data
());
in_paddings
.
data
(),
out_shape
.
data
(),
nullptr
);
}
}
return
kernel_
->
Compute
(
context
,
input
,
filter
,
bias
,
return
kernel_
->
Compute
(
context
,
input
,
filter
,
bias
,
strides_
.
data
(),
paddings
.
data
(),
activation_
,
strides_
.
data
(),
in_
paddings
.
data
(),
activation_
,
relux_max_limit_
,
out
put
_shape
,
output
);
relux_max_limit_
,
out_shape
,
output
);
}
}
private:
private:
...
...
mace/ops/deconv_2d.h
浏览文件 @
48e6ee96
...
@@ -15,7 +15,14 @@
...
@@ -15,7 +15,14 @@
#ifndef MACE_OPS_DECONV_2D_H_
#ifndef MACE_OPS_DECONV_2D_H_
#define MACE_OPS_DECONV_2D_H_
#define MACE_OPS_DECONV_2D_H_
#include <algorithm>
#include <string>
#include <vector>
#include "mace/core/operator.h"
#include "mace/core/types.h"
#include "mace/core/types.h"
#include "mace/ops/activation.h"
#include "mace/ops/conv_pool_2d_util.h"
namespace
mace
{
namespace
mace
{
namespace
ops
{
namespace
ops
{
...
@@ -25,6 +32,167 @@ enum FrameworkType {
...
@@ -25,6 +32,167 @@ enum FrameworkType {
CAFFE
=
1
,
CAFFE
=
1
,
};
};
class
Deconv2dOpBase
:
public
Operation
{
public:
explicit
Deconv2dOpBase
(
OpConstructContext
*
context
)
:
Operation
(
context
),
strides_
(
Operation
::
GetRepeatedArgs
<
int
>
(
"strides"
)),
padding_type_
(
static_cast
<
Padding
>
(
Operation
::
GetOptionalArg
<
int
>
(
"padding"
,
static_cast
<
int
>
(
SAME
)))),
paddings_
(
Operation
::
GetRepeatedArgs
<
int
>
(
"padding_values"
)),
group_
(
Operation
::
GetOptionalArg
<
int
>
(
"group"
,
1
)),
model_type_
(
static_cast
<
ops
::
FrameworkType
>
(
Operation
::
GetOptionalArg
<
int
>
(
"framework_type"
,
0
))),
activation_
(
ops
::
StringToActivationType
(
Operation
::
GetOptionalArg
<
std
::
string
>
(
"activation"
,
"NOOP"
))),
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
))
{}
static
void
CalcDeconvShape_Caffe
(
const
index_t
*
input_shape
,
// NHWC
const
index_t
*
filter_shape
,
// OIHW
const
int
*
strides
,
const
int
*
out_paddings
,
const
int
group
,
int
*
in_paddings
,
index_t
*
out_shape
,
index_t
*
padded_out_shape
,
const
bool
isNCHW
=
false
)
{
MACE_CHECK_NOTNULL
(
out_paddings
);
MACE_CHECK_NOTNULL
(
input_shape
);
MACE_CHECK_NOTNULL
(
filter_shape
);
MACE_CHECK_NOTNULL
(
strides
);
const
index_t
in_height
=
isNCHW
?
input_shape
[
2
]
:
input_shape
[
1
];
const
index_t
in_width
=
isNCHW
?
input_shape
[
3
]
:
input_shape
[
2
];
const
index_t
output_channel
=
filter_shape
[
0
]
*
group
;
const
index_t
kernel_h
=
filter_shape
[
2
];
const
index_t
kernel_w
=
filter_shape
[
3
];
index_t
padded_out_height
=
(
in_height
-
1
)
*
strides
[
0
]
+
kernel_h
;
index_t
padded_out_width
=
(
in_width
-
1
)
*
strides
[
1
]
+
kernel_w
;
if
(
in_paddings
!=
nullptr
)
{
in_paddings
[
0
]
=
static_cast
<
int
>
((
kernel_h
-
1
)
*
2
-
out_paddings
[
0
]);
in_paddings
[
1
]
=
static_cast
<
int
>
((
kernel_w
-
1
)
*
2
-
out_paddings
[
1
]);
in_paddings
[
0
]
=
std
::
max
<
int
>
(
0
,
in_paddings
[
0
]);
in_paddings
[
1
]
=
std
::
max
<
int
>
(
0
,
in_paddings
[
1
]);
}
if
(
padded_out_shape
!=
nullptr
)
{
padded_out_shape
[
0
]
=
input_shape
[
0
];
padded_out_shape
[
1
]
=
isNCHW
?
output_channel
:
padded_out_height
;
padded_out_shape
[
2
]
=
isNCHW
?
padded_out_height
:
padded_out_width
;
padded_out_shape
[
3
]
=
isNCHW
?
padded_out_width
:
output_channel
;
}
if
(
out_shape
!=
nullptr
)
{
index_t
out_height
=
padded_out_height
-
out_paddings
[
0
];
index_t
out_width
=
padded_out_width
-
out_paddings
[
1
];
out_shape
[
0
]
=
input_shape
[
0
];
out_shape
[
1
]
=
isNCHW
?
output_channel
:
out_height
;
out_shape
[
2
]
=
isNCHW
?
out_height
:
out_width
;
out_shape
[
3
]
=
isNCHW
?
out_width
:
output_channel
;
}
}
static
void
CalcDeconvShape_TF
(
const
index_t
*
input_shape
,
// NHWC
const
index_t
*
filter_shape
,
// OIHW
const
index_t
*
output_shape
,
const
int
*
strides
,
const
int
group
,
Padding
padding_type
,
int
*
in_paddings
,
int
*
out_paddings
,
index_t
*
padded_out_shape
,
const
bool
isNCHW
=
false
)
{
MACE_CHECK_NOTNULL
(
output_shape
);
MACE_CHECK_NOTNULL
(
input_shape
);
MACE_CHECK_NOTNULL
(
filter_shape
);
MACE_CHECK_NOTNULL
(
strides
);
const
index_t
in_height
=
isNCHW
?
input_shape
[
2
]
:
input_shape
[
1
];
const
index_t
in_width
=
isNCHW
?
input_shape
[
3
]
:
input_shape
[
2
];
const
index_t
out_height
=
isNCHW
?
output_shape
[
2
]
:
output_shape
[
1
];
const
index_t
out_width
=
isNCHW
?
output_shape
[
3
]
:
output_shape
[
2
];
const
index_t
extended_in_height
=
(
in_height
-
1
)
*
strides
[
0
]
+
1
;
const
index_t
extended_in_width
=
(
in_width
-
1
)
*
strides
[
1
]
+
1
;
const
index_t
kernel_h
=
filter_shape
[
2
];
const
index_t
kernel_w
=
filter_shape
[
3
];
index_t
expected_input_height
=
0
,
expected_input_width
=
0
;
switch
(
padding_type
)
{
case
VALID
:
expected_input_height
=
(
out_height
-
kernel_h
+
strides
[
0
])
/
strides
[
0
];
expected_input_width
=
(
out_width
-
kernel_w
+
strides
[
1
])
/
strides
[
1
];
break
;
case
SAME
:
expected_input_height
=
(
out_height
+
strides
[
0
]
-
1
)
/
strides
[
0
];
expected_input_width
=
(
out_width
+
strides
[
1
]
-
1
)
/
strides
[
1
];
break
;
default:
MACE_CHECK
(
false
,
"Unsupported padding type: "
,
padding_type
);
}
MACE_CHECK
(
expected_input_height
==
in_height
,
expected_input_height
,
"!="
,
in_height
);
MACE_CHECK
(
expected_input_width
==
in_width
,
expected_input_width
,
"!="
,
in_width
);
const
index_t
padded_out_height
=
(
in_height
-
1
)
*
strides
[
0
]
+
kernel_h
;
const
index_t
padded_out_width
=
(
in_width
-
1
)
*
strides
[
1
]
+
kernel_w
;
if
(
in_paddings
!=
nullptr
)
{
const
int
p_h
=
static_cast
<
int
>
(
out_height
+
kernel_h
-
1
-
extended_in_height
);
const
int
p_w
=
static_cast
<
int
>
(
out_width
+
kernel_w
-
1
-
extended_in_width
);
in_paddings
[
0
]
=
std
::
max
<
int
>
(
0
,
p_h
);
in_paddings
[
1
]
=
std
::
max
<
int
>
(
0
,
p_w
);
}
if
(
out_paddings
!=
nullptr
)
{
const
int
o_p_h
=
static_cast
<
int
>
(
padded_out_height
-
out_height
);
const
int
o_p_w
=
static_cast
<
int
>
(
padded_out_width
-
out_width
);
out_paddings
[
0
]
=
std
::
max
<
int
>
(
0
,
o_p_h
);
out_paddings
[
1
]
=
std
::
max
<
int
>
(
0
,
o_p_w
);
}
if
(
padded_out_shape
!=
nullptr
)
{
index_t
output_channel
=
filter_shape
[
0
]
*
group
;
padded_out_shape
[
0
]
=
output_shape
[
0
];
padded_out_shape
[
1
]
=
isNCHW
?
output_channel
:
padded_out_height
;
padded_out_shape
[
2
]
=
isNCHW
?
padded_out_height
:
padded_out_width
;
padded_out_shape
[
3
]
=
isNCHW
?
padded_out_width
:
output_channel
;
}
}
protected:
std
::
vector
<
int
>
strides_
;
// [stride_h, stride_w]
const
Padding
padding_type_
;
std
::
vector
<
int
>
paddings_
;
const
int
group_
;
const
FrameworkType
model_type_
;
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
};
template
<
typename
T
>
template
<
typename
T
>
void
CropPadOut
(
const
T
*
input
,
void
CropPadOut
(
const
T
*
input
,
const
index_t
*
in_shape
,
const
index_t
*
in_shape
,
...
...
mace/ops/deconv_2d_test.cc
浏览文件 @
48e6ee96
...
@@ -370,9 +370,9 @@ TEST_F(Deconv2dOpTest, OPENCLSimple3X3PaddingValid_S2) {
...
@@ -370,9 +370,9 @@ TEST_F(Deconv2dOpTest, OPENCLSimple3X3PaddingValid_S2) {
namespace
{
namespace
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
TestComplexDeconvNxN
S12
(
const
int
batch
,
void
TestComplexDeconvNxN
(
const
int
batch
,
const
std
::
vector
<
int
>
&
shape
,
const
std
::
vector
<
int
>
&
shape
,
const
int
stride
)
{
const
int
stride
)
{
testing
::
internal
::
LogToStderr
();
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
Padding
type
,
int
padding
)
{
Padding
type
,
int
padding
)
{
...
@@ -415,8 +415,6 @@ void TestComplexDeconvNxNS12(const int batch,
...
@@ -415,8 +415,6 @@ void TestComplexDeconvNxNS12(const int batch,
output_shape
.
push_back
(
output_channels
);
output_shape
.
push_back
(
output_channels
);
net
.
AddInputFromArray
<
D
,
int32_t
>
(
"OutputShape"
,
{
4
},
output_shape
);
net
.
AddInputFromArray
<
D
,
int32_t
>
(
"OutputShape"
,
{
4
},
output_shape
);
}
else
{
}
else
{
// out_h = (height - 1) * stride + 1 + padding - kernel_h + 1;
// out_w = (width -1) * stride + 1 + padding - kernel_w + 1;
paddings
.
push_back
(
padding
);
paddings
.
push_back
(
padding
);
paddings
.
push_back
(
padding
);
paddings
.
push_back
(
padding
);
}
}
...
@@ -497,38 +495,42 @@ void TestComplexDeconvNxNS12(const int batch,
...
@@ -497,38 +495,42 @@ void TestComplexDeconvNxNS12(const int batch,
1e-4
);
1e-4
);
};
};
for
(
int
kernel_size
:
{
3
,
4
,
5
,
7
})
{
for
(
int
kernel_size
:
{
2
,
3
,
4
,
5
,
7
})
{
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
VALID
,
-
1
);
if
(
kernel_size
>=
stride
)
{
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
SAME
,
-
1
);
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
VALID
,
-
1
);
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
VALID
,
2
);
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
SAME
,
-
1
);
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
VALID
,
3
);
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
VALID
,
1
);
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
VALID
,
2
);
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
VALID
,
3
);
}
}
}
}
}
}
// namespace
}
// namespace
TEST_F
(
Deconv2dOpTest
,
OPENCLAlignedDeconvNxNS12
)
{
TEST_F
(
Deconv2dOpTest
,
OPENCLAlignedDeconvNxNS12
)
{
TestComplexDeconvNxN
S12
<
DeviceType
::
GPU
,
float
>
(
1
,
{
32
,
16
,
16
,
32
},
1
);
TestComplexDeconvNxN
<
DeviceType
::
GPU
,
float
>
(
1
,
{
32
,
16
,
16
,
32
},
1
);
TestComplexDeconvNxN
S12
<
DeviceType
::
GPU
,
float
>
(
1
,
{
32
,
16
,
16
,
32
},
2
);
TestComplexDeconvNxN
<
DeviceType
::
GPU
,
float
>
(
1
,
{
32
,
16
,
16
,
32
},
2
);
}
}
TEST_F
(
Deconv2dOpTest
,
OPENCLAlignedDeconvNxNS34
)
{
TEST_F
(
Deconv2dOpTest
,
OPENCLAlignedDeconvNxNS34
)
{
TestComplexDeconvNxN
S12
<
DeviceType
::
GPU
,
float
>
(
1
,
{
32
,
16
,
16
,
32
},
3
);
TestComplexDeconvNxN
<
DeviceType
::
GPU
,
float
>
(
1
,
{
32
,
16
,
16
,
32
},
3
);
TestComplexDeconvNxN
S12
<
DeviceType
::
GPU
,
float
>
(
1
,
{
32
,
16
,
16
,
32
},
4
);
TestComplexDeconvNxN
<
DeviceType
::
GPU
,
float
>
(
1
,
{
32
,
16
,
16
,
32
},
4
);
}
}
TEST_F
(
Deconv2dOpTest
,
OPENCLUnalignedDeconvNxNS12
)
{
TEST_F
(
Deconv2dOpTest
,
OPENCLUnalignedDeconvNxNS12
)
{
TestComplexDeconvNxN
S12
<
DeviceType
::
GPU
,
float
>
(
1
,
{
17
,
113
,
5
,
7
},
1
);
TestComplexDeconvNxN
<
DeviceType
::
GPU
,
float
>
(
1
,
{
17
,
113
,
5
,
7
},
1
);
TestComplexDeconvNxN
S12
<
DeviceType
::
GPU
,
float
>
(
1
,
{
17
,
113
,
5
,
7
},
2
);
TestComplexDeconvNxN
<
DeviceType
::
GPU
,
float
>
(
1
,
{
17
,
113
,
5
,
7
},
2
);
}
}
TEST_F
(
Deconv2dOpTest
,
OPENCLUnalignedDeconvNxNS34
)
{
TEST_F
(
Deconv2dOpTest
,
OPENCLUnalignedDeconvNxNS34
)
{
TestComplexDeconvNxN
S12
<
DeviceType
::
GPU
,
float
>
(
1
,
{
17
,
113
,
5
,
7
},
3
);
TestComplexDeconvNxN
<
DeviceType
::
GPU
,
float
>
(
1
,
{
17
,
113
,
5
,
7
},
3
);
TestComplexDeconvNxN
S12
<
DeviceType
::
GPU
,
float
>
(
1
,
{
17
,
113
,
5
,
7
},
4
);
TestComplexDeconvNxN
<
DeviceType
::
GPU
,
float
>
(
1
,
{
17
,
113
,
5
,
7
},
4
);
}
}
TEST_F
(
Deconv2dOpTest
,
OPENCLUnalignedDeconvNxNMultiBatch
)
{
TEST_F
(
Deconv2dOpTest
,
OPENCLUnalignedDeconvNxNMultiBatch
)
{
TestComplexDeconvNxN
S12
<
DeviceType
::
GPU
,
float
>
(
3
,
{
17
,
113
,
5
,
7
},
1
);
TestComplexDeconvNxN
<
DeviceType
::
GPU
,
float
>
(
3
,
{
17
,
113
,
5
,
7
},
1
);
TestComplexDeconvNxN
S12
<
DeviceType
::
GPU
,
float
>
(
5
,
{
17
,
113
,
5
,
7
},
2
);
TestComplexDeconvNxN
<
DeviceType
::
GPU
,
float
>
(
5
,
{
17
,
113
,
5
,
7
},
2
);
}
}
}
// namespace test
}
// namespace test
...
...
mace/ops/depthwise_deconv2d.cc
浏览文件 @
48e6ee96
...
@@ -24,11 +24,8 @@
...
@@ -24,11 +24,8 @@
#include <vector>
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/operator.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
#include "mace/ops/activation.h"
#include "mace/ops/arm/depthwise_deconv2d_neon.h"
#include "mace/ops/arm/depthwise_deconv2d_neon.h"
#include "mace/ops/conv_pool_2d_util.h"
#include "mace/utils/utils.h"
#include "mace/utils/utils.h"
#include "mace/public/mace.h"
#include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_OPENCL
...
@@ -38,90 +35,15 @@
...
@@ -38,90 +35,15 @@
namespace
mace
{
namespace
mace
{
namespace
ops
{
namespace
ops
{
class
DepthwiseDeconv2dOpBase
:
public
Operation
{
public:
explicit
DepthwiseDeconv2dOpBase
(
OpConstructContext
*
context
)
:
Operation
(
context
),
strides_
(
Operation
::
GetRepeatedArgs
<
int
>
(
"strides"
)),
paddings_
(
Operation
::
GetRepeatedArgs
<
int
>
(
"padding_values"
)),
group_
(
Operation
::
GetOptionalArg
<
int
>
(
"group"
,
1
)),
activation_
(
ops
::
StringToActivationType
(
Operation
::
GetOptionalArg
<
std
::
string
>
(
"activation"
,
"NOOP"
))),
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
))
{}
static
void
CalcGroupDeconvOutputSize
(
const
index_t
*
input_shape
,
// NHWC
const
index_t
*
filter_shape
,
// OIHW
const
int
group
,
const
int
*
strides
,
const
int
*
paddings
,
int
*
pre_paddings
,
index_t
*
out_shape
,
index_t
*
padded_out_shape
,
const
bool
isNCHW
=
false
)
{
MACE_CHECK_NOTNULL
(
paddings
);
MACE_CHECK_NOTNULL
(
input_shape
);
MACE_CHECK_NOTNULL
(
filter_shape
);
MACE_CHECK_NOTNULL
(
strides
);
const
index_t
in_height
=
isNCHW
?
input_shape
[
2
]
:
input_shape
[
1
];
const
index_t
in_width
=
isNCHW
?
input_shape
[
3
]
:
input_shape
[
2
];
const
index_t
output_channel
=
filter_shape
[
0
]
*
group
;
const
index_t
kernel_h
=
filter_shape
[
2
];
const
index_t
kernel_w
=
filter_shape
[
3
];
index_t
padded_out_height
=
(
in_height
-
1
)
*
strides
[
0
]
+
kernel_h
;
index_t
padded_out_width
=
(
in_width
-
1
)
*
strides
[
1
]
+
kernel_w
;
if
(
pre_paddings
!=
nullptr
)
{
pre_paddings
[
0
]
=
static_cast
<
int
>
((
kernel_h
-
1
)
*
2
-
paddings
[
0
]);
pre_paddings
[
1
]
=
static_cast
<
int
>
((
kernel_w
-
1
)
*
2
-
paddings
[
1
]);
pre_paddings
[
0
]
=
std
::
max
<
int
>
(
0
,
pre_paddings
[
0
]);
pre_paddings
[
1
]
=
std
::
max
<
int
>
(
0
,
pre_paddings
[
1
]);
}
if
(
padded_out_shape
!=
nullptr
)
{
padded_out_shape
[
0
]
=
input_shape
[
0
];
padded_out_shape
[
1
]
=
isNCHW
?
output_channel
:
padded_out_height
;
padded_out_shape
[
2
]
=
isNCHW
?
padded_out_height
:
padded_out_width
;
padded_out_shape
[
3
]
=
isNCHW
?
padded_out_width
:
output_channel
;
}
if
(
out_shape
!=
nullptr
)
{
index_t
out_height
=
padded_out_height
-
paddings
[
0
];
index_t
out_width
=
padded_out_width
-
paddings
[
1
];
out_shape
[
0
]
=
input_shape
[
0
];
out_shape
[
1
]
=
isNCHW
?
output_channel
:
out_height
;
out_shape
[
2
]
=
isNCHW
?
out_height
:
out_width
;
out_shape
[
3
]
=
isNCHW
?
out_width
:
output_channel
;
}
}
protected:
std
::
vector
<
int
>
strides_
;
// [stride_h, stride_w]
std
::
vector
<
int
>
paddings_
;
const
int
group_
;
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
};
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
DepthwiseDeconv2dOp
;
class
DepthwiseDeconv2dOp
;
template
<
>
template
<
>
class
DepthwiseDeconv2dOp
<
DeviceType
::
CPU
,
float
>
class
DepthwiseDeconv2dOp
<
DeviceType
::
CPU
,
float
>
:
public
De
pthwiseDe
conv2dOpBase
{
:
public
Deconv2dOpBase
{
public:
public:
explicit
DepthwiseDeconv2dOp
(
OpConstructContext
*
context
)
explicit
DepthwiseDeconv2dOp
(
OpConstructContext
*
context
)
:
De
pthwiseDe
conv2dOpBase
(
context
)
{}
:
Deconv2dOpBase
(
context
)
{}
MaceStatus
Run
(
OpContext
*
context
)
override
{
MaceStatus
Run
(
OpContext
*
context
)
override
{
const
Tensor
*
input
=
this
->
Input
(
0
);
const
Tensor
*
input
=
this
->
Input
(
0
);
...
@@ -138,15 +60,17 @@ class DepthwiseDeconv2dOp<DeviceType::CPU, float>
...
@@ -138,15 +60,17 @@ class DepthwiseDeconv2dOp<DeviceType::CPU, float>
std
::
vector
<
index_t
>
padded_out_shape
(
4
,
0
);
std
::
vector
<
index_t
>
padded_out_shape
(
4
,
0
);
if
(
!
paddings_
.
empty
())
out_paddings
=
paddings_
;
if
(
!
paddings_
.
empty
())
out_paddings
=
paddings_
;
CalcGroupDeconvOutputSize
(
input
->
shape
().
data
(),
CalcDeconvShape_Caffe
(
filter
->
shape
().
data
(),
input
->
shape
().
data
(),
group_
,
filter
->
shape
().
data
(),
strides_
.
data
(),
strides_
.
data
(),
out_paddings
.
data
(),
out_paddings
.
data
(),
nullptr
,
group_
,
out_shape
.
data
(),
nullptr
,
padded_out_shape
.
data
(),
out_shape
.
data
(),
true
);
padded_out_shape
.
data
(),
true
);
MACE_RETURN_IF_ERROR
(
output
->
Resize
(
out_shape
));
MACE_RETURN_IF_ERROR
(
output
->
Resize
(
out_shape
));
output
->
Clear
();
output
->
Clear
();
index_t
kernel_h
=
filter
->
dim
(
2
);
index_t
kernel_h
=
filter
->
dim
(
2
);
...
@@ -480,10 +404,10 @@ class DepthwiseDeconv2dOp<DeviceType::CPU, float>
...
@@ -480,10 +404,10 @@ class DepthwiseDeconv2dOp<DeviceType::CPU, float>
#ifdef MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_OPENCL
template
<
typename
T
>
template
<
typename
T
>
class
DepthwiseDeconv2dOp
<
DeviceType
::
GPU
,
T
>
:
public
De
pthwiseDe
conv2dOpBase
{
class
DepthwiseDeconv2dOp
<
DeviceType
::
GPU
,
T
>
:
public
Deconv2dOpBase
{
public:
public:
explicit
DepthwiseDeconv2dOp
(
OpConstructContext
*
context
)
explicit
DepthwiseDeconv2dOp
(
OpConstructContext
*
context
)
:
De
pthwiseDe
conv2dOpBase
(
context
)
{
:
Deconv2dOpBase
(
context
)
{
if
(
context
->
device
()
->
opencl_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
opencl_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
DepthwiseDeconv2dKernel
<
T
>
);
kernel_
.
reset
(
new
opencl
::
image
::
DepthwiseDeconv2dKernel
<
T
>
);
}
else
{
}
else
{
...
@@ -501,16 +425,18 @@ class DepthwiseDeconv2dOp<DeviceType::GPU, T> : public DepthwiseDeconv2dOpBase {
...
@@ -501,16 +425,18 @@ class DepthwiseDeconv2dOp<DeviceType::GPU, T> : public DepthwiseDeconv2dOpBase {
MACE_CHECK_NOTNULL
(
output
);
MACE_CHECK_NOTNULL
(
output
);
std
::
vector
<
int
>
in_paddings
(
2
,
0
);
std
::
vector
<
int
>
in_paddings
(
2
,
0
);
std
::
vector
<
int
>
out_paddings
(
2
,
0
);
std
::
vector
<
index_t
>
out_shape
(
4
,
0
);
std
::
vector
<
index_t
>
out_shape
(
4
,
0
);
CalcGroupDeconvOutputSize
(
input
->
shape
().
data
(),
if
(
!
paddings_
.
empty
())
out_paddings
=
paddings_
;
filter
->
shape
().
data
(),
CalcDeconvShape_Caffe
(
input
->
shape
().
data
(),
group_
,
filter
->
shape
().
data
(),
strides_
.
data
(),
strides_
.
data
(),
paddings_
.
data
(),
out_paddings
.
data
(),
in_paddings
.
data
(),
group_
,
out_shape
.
data
(),
in_paddings
.
data
(),
nullptr
);
out_shape
.
data
(),
nullptr
);
return
kernel_
->
Compute
(
context
,
return
kernel_
->
Compute
(
context
,
input
,
input
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录