Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
b14e21c3
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b14e21c3
编写于
1月 16, 2020
作者:
Z
zhupengyang
提交者:
GitHub
1月 16, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] enhance conv_transpose and ut (#2773)
上级
74f4a312
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
403 addition
and
416 deletion
+403
-416
lite/kernels/npu/bridges/conv_transpose_op.cc
lite/kernels/npu/bridges/conv_transpose_op.cc
+57
-15
lite/kernels/npu/bridges/conv_transpose_op_test.cc
lite/kernels/npu/bridges/conv_transpose_op_test.cc
+0
-372
lite/operators/conv_transpose_op.cc
lite/operators/conv_transpose_op.cc
+3
-28
lite/tests/kernels/CMakeLists.txt
lite/tests/kernels/CMakeLists.txt
+2
-0
lite/tests/kernels/conv_transpose_compute_test.cc
lite/tests/kernels/conv_transpose_compute_test.cc
+341
-0
lite/tests/utils/naive_math_impl.h
lite/tests/utils/naive_math_impl.h
+0
-1
未找到文件。
lite/kernels/npu/bridges/conv_transpose_op.cc
浏览文件 @
b14e21c3
...
...
@@ -15,6 +15,7 @@
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/utility.h"
#include "lite/operators/conv_op.h"
namespace
paddle
{
namespace
lite
{
...
...
@@ -38,6 +39,7 @@ int ConvTransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto
input
=
scope
->
FindMutableTensor
(
input_name
);
auto
input_dims
=
input
->
dims
();
CHECK_EQ
(
input_dims
.
size
(),
4
);
auto
filter_name
=
op_info
->
Input
(
"Filter"
).
front
();
auto
filter_type
=
kernel
->
GetInputDeclType
(
"Filter"
);
CHECK
(
filter_type
->
precision
()
==
PRECISION
(
kFloat
));
...
...
@@ -45,18 +47,53 @@ int ConvTransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto
filter
=
scope
->
FindMutableTensor
(
filter_name
);
auto
filter_dims
=
filter
->
dims
();
CHECK_EQ
(
filter_dims
.
size
(),
4
);
auto
output_name
=
op_info
->
Output
(
"Output"
).
front
();
auto
output_type
=
kernel
->
GetOutputDeclType
(
"Output"
);
CHECK
(
output_type
->
precision
()
==
PRECISION
(
kFloat
));
CHECK
(
output_type
->
layout
()
==
DATALAYOUT
(
kNCHW
));
auto
strides
=
op_info
->
GetAttr
<
std
::
vector
<
int
>>
(
"strides"
);
auto
paddings
=
op_info
->
GetAttr
<
std
::
vector
<
int
>>
(
"paddings"
);
CHECK_EQ
(
strides
.
size
(),
2L
);
auto
groups
=
op_info
->
GetAttr
<
int
>
(
"groups"
);
auto
dilations
=
op_info
->
GetAttr
<
std
::
vector
<
int
>>
(
"dilations"
);
if
(
groups
>
1
)
{
LOG
(
WARNING
)
<<
"[NPU] only support groups == 1"
;
return
FAILED
;
}
auto
fuse_relu
=
op_info
->
HasAttr
(
"fuse_relu"
)
&&
op_info
->
GetAttr
<
bool
>
(
"fuse_relu"
);
CHECK_EQ
(
strides
.
size
(),
2L
);
std
::
vector
<
int
>
output_size
;
if
(
op_info
->
HasAttr
(
"output_size"
))
{
output_size
=
op_info
->
GetAttr
<
std
::
vector
<
int
>>
(
"output_size"
);
}
auto
paddings
=
op_info
->
GetAttr
<
std
::
vector
<
int
>>
(
"paddings"
);
auto
dilations
=
op_info
->
GetAttr
<
std
::
vector
<
int
>>
(
"dilations"
);
CHECK_EQ
(
dilations
.
size
(),
2L
);
std
::
string
padding_algorithm
=
op_info
->
HasAttr
(
"padding_algorithm"
)
?
op_info
->
GetAttr
<
std
::
string
>
(
"padding_algorithm"
)
:
""
;
if
(
paddings
.
size
()
==
2L
)
{
for
(
size_t
i
=
0
;
i
<
2L
;
++
i
)
{
int
copy_pad
=
*
(
paddings
.
begin
()
+
2
*
i
);
paddings
.
insert
(
paddings
.
begin
()
+
2
*
i
+
1
,
copy_pad
);
}
}
CHECK_EQ
(
paddings
.
size
(),
4L
)
<<
"[NPU] Paddings size should be the same or twice as the input size."
;
operators
::
UpdatePaddingAndDilation
(
&
paddings
,
&
dilations
,
strides
,
padding_algorithm
,
input_dims
,
filter_dims
);
if
(
paddings
[
0
]
!=
paddings
[
1
]
||
paddings
[
2
]
!=
paddings
[
3
])
{
LOG
(
WARNING
)
<<
"[NPU] only support
\"
pad_top == pad_bottom && pad_left == "
"pad_right
\"
."
;
return
FAILED
;
}
// Input node
std
::
shared_ptr
<
Node
>
input_node
=
nullptr
;
...
...
@@ -67,23 +104,23 @@ int ConvTransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
}
// Create input sizes node to describe the dimensions of input tensor
if
(
paddings
.
size
()
==
2L
)
{
for
(
size_t
i
=
0
;
i
<
2L
;
++
i
)
{
int
copy_pad
=
*
(
paddings
.
begin
()
+
2
*
i
);
paddings
.
insert
(
paddings
.
begin
()
+
2
*
i
+
1
,
copy_pad
);
}
}
CHECK_EQ
(
paddings
.
size
(),
4L
)
<<
"[NPU] Paddings size should be the same or twice as the input size."
;
std
::
vector
<
int32_t
>
input_sizes
;
input_sizes
.
push_back
(
input_dims
[
0
]);
input_sizes
.
push_back
(
filter_dims
[
1
]
*
groups
);
for
(
int
i
=
0
;
i
<
strides
.
size
();
i
++
)
{
int
kernel_ext
=
dilations
[
i
]
*
(
filter_dims
[
i
+
2
]
-
1
)
+
1
;
int
output_size
=
(
input_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
+
kernel_ext
-
2
*
paddings
[
i
];
int
output_size
=
(
input_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
+
kernel_ext
-
paddings
[
i
*
2
]
-
paddings
[
i
*
2
+
1
];
input_sizes
.
push_back
(
output_size
);
}
if
(
!
output_size
.
empty
())
{
CHECK_EQ
(
output_size
.
size
(),
2L
);
if
(
output_size
[
0
]
!=
input_sizes
[
2
]
||
output_size
[
1
]
!=
input_sizes
[
3
])
{
LOG
(
WARNING
)
<<
"[NPU] not support output_size: "
<<
output_size
[
0
]
<<
", "
<<
output_size
[
1
];
return
FAILED
;
}
}
auto
input_sizes_node
=
graph
->
Add
(
output_name
+
"/input_sizes"
,
input_sizes
);
// Filter node
...
...
@@ -96,8 +133,13 @@ int ConvTransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
conv_transpose_op
->
set_input_filter
(
*
filter_node
->
data
());
conv_transpose_op
->
set_input_x
(
*
input_node
->
data
());
// Set attributes
conv_transpose_op
->
set_attr_format
(
0
);
// NCHW
conv_transpose_op
->
set_attr_pad_mode
(
0
);
// NOTSET
conv_transpose_op
->
set_attr_format
(
0
);
// NCHW
// "SAME" is different from paddle
if
(
padding_algorithm
==
"VALID"
)
{
conv_transpose_op
->
set_attr_pad_mode
(
5
);
}
else
{
conv_transpose_op
->
set_attr_pad_mode
(
0
);
// NOTSET
}
conv_transpose_op
->
set_attr_group
(
groups
);
conv_transpose_op
->
set_attr_pad
(
ge
::
AttrValue
::
LIST_INT
(
{
paddings
[
0
],
paddings
[
1
],
paddings
[
2
],
paddings
[
3
]}));
...
...
lite/kernels/npu/bridges/conv_transpose_op_test.cc
已删除
100644 → 0
浏览文件 @
74f4a312
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/conv_transpose_op.h"
#include <gtest/gtest.h>
#include <random>
#include "lite/core/op_registry.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/test_helper.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
npu
{
namespace
bridges
{
template
<
typename
DType
>
void
add_bias_with_relu
(
DType
*
data
,
const
DType
*
bias
,
int
channel_size
,
int
inner_size
,
bool
has_relu
)
{
for
(
int
c
=
0
;
c
<
channel_size
;
++
c
)
{
DType
bias_val
=
bias
!=
nullptr
?
bias
[
c
]
:
0
;
for
(
int
i
=
0
;
i
<
inner_size
;
i
++
)
{
DType
data_val
=
data
[
i
];
data_val
+=
bias_val
;
if
(
has_relu
)
{
data_val
=
data_val
>
0
?
data_val
:
0.
f
;
}
data
[
i
]
=
data_val
;
}
data
+=
inner_size
;
}
}
template
<
typename
DType
>
void
col2im
(
const
DType
*
data_col
,
const
int
channel_size
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
DType
*
data_im
)
{
memset
(
data_im
,
0
,
height
*
width
*
channel_size
*
sizeof
(
DType
));
const
int
output_h
=
(
height
+
2
*
pad_h
-
(
dilation_h
*
(
kernel_h
-
1
)
+
1
))
/
stride_h
+
1
;
const
int
output_w
=
(
width
+
2
*
pad_w
-
(
dilation_w
*
(
kernel_w
-
1
)
+
1
))
/
stride_w
+
1
;
const
int
inner_size
=
height
*
width
;
for
(
int
c
=
channel_size
;
c
--
;
data_im
+=
inner_size
)
{
for
(
int
kernel_row
=
0
;
kernel_row
<
kernel_h
;
kernel_row
++
)
{
for
(
int
kernel_col
=
0
;
kernel_col
<
kernel_w
;
kernel_col
++
)
{
int
input_row
=
-
pad_h
+
kernel_row
*
dilation_h
;
for
(
int
output_rows
=
output_h
;
output_rows
;
output_rows
--
)
{
if
(
input_row
<
0
||
input_row
>=
height
)
{
data_col
+=
output_w
;
}
else
{
int
input_col
=
-
pad_w
+
kernel_col
*
dilation_w
;
for
(
int
output_col
=
output_w
;
output_col
;
output_col
--
)
{
if
(
input_col
>=
0
&&
input_col
<
width
)
{
data_im
[
input_row
*
width
+
input_col
]
+=
*
data_col
;
}
data_col
++
;
input_col
+=
stride_w
;
}
}
input_row
+=
stride_h
;
}
}
}
}
}
template
<
typename
IType
,
typename
OType
>
void
gemm
(
int
M
,
int
N
,
int
K
,
const
IType
*
A
,
const
IType
*
B
,
OType
*
C
,
OType
alpha
,
OType
beta
,
bool
is_trans_A
=
false
,
bool
is_trans_B
=
false
)
{
for
(
int
m
=
0
;
m
<
M
;
++
m
)
{
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
OType
sum
=
static_cast
<
OType
>
(
0
);
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
IType
a
;
IType
b
;
if
(
is_trans_A
)
{
a
=
A
[
k
*
M
+
m
];
}
else
{
a
=
A
[
m
*
K
+
k
];
}
if
(
is_trans_B
)
{
b
=
B
[
n
*
K
+
k
];
}
else
{
b
=
B
[
k
*
N
+
n
];
}
sum
+=
a
*
b
;
}
C
[
m
*
N
+
n
]
=
alpha
*
sum
+
beta
*
C
[
m
*
N
+
n
];
}
}
}
template
<
typename
IType
,
typename
OType
>
void
conv_transpose_ref
(
const
std
::
shared_ptr
<
operators
::
ConvTransposeOpLite
>
op
)
{
Scope
*
scope
=
op
->
scope
();
const
OpInfo
*
op_info
=
op
->
op_info
();
auto
input
=
scope
->
FindVar
(
op_info
->
Input
(
"Input"
).
front
())
->
GetMutable
<
Tensor
>
();
auto
filter
=
scope
->
FindVar
(
op_info
->
Input
(
"Filter"
).
front
())
->
GetMutable
<
Tensor
>
();
auto
output
=
scope
->
FindVar
(
op_info
->
Output
(
"Output"
).
front
())
->
GetMutable
<
Tensor
>
();
std
::
vector
<
int32_t
>
strides
=
op_info
->
GetAttr
<
std
::
vector
<
int32_t
>>
(
"strides"
);
std
::
vector
<
int32_t
>
paddings
=
op_info
->
GetAttr
<
std
::
vector
<
int32_t
>>
(
"paddings"
);
int32_t
groups
=
op_info
->
GetAttr
<
int32_t
>
(
"groups"
);
std
::
vector
<
int32_t
>
dilations
=
op_info
->
GetAttr
<
std
::
vector
<
int32_t
>>
(
"dilations"
);
bool
fuse_relu
=
op_info
->
GetAttr
<
bool
>
(
"fuse_relu"
);
Tensor
*
bias
=
nullptr
;
OType
*
bias_data
=
nullptr
;
if
(
op_info
->
HasInput
(
"Bias"
))
{
auto
bias_var_names
=
op_info
->
Input
(
"Bias"
);
if
(
bias_var_names
.
size
()
>
0
)
{
auto
bias_var_name
=
bias_var_names
.
front
();
bias
=
scope
->
FindVar
(
bias_var_name
)
->
GetMutable
<
Tensor
>
();
bias_data
=
bias
->
mutable_data
<
OType
>
();
}
}
auto
input_dims
=
input
->
dims
();
auto
filter_dims
=
filter
->
dims
();
auto
output_dims
=
output
->
dims
();
auto
input_data
=
input
->
mutable_data
<
IType
>
();
auto
filter_data
=
filter
->
mutable_data
<
IType
>
();
auto
output_data
=
output
->
mutable_data
<
OType
>
();
int
kernel_w
=
filter_dims
[
3
];
int
kernel_h
=
filter_dims
[
2
];
int
stride_w
=
strides
[
1
];
int
stride_h
=
strides
[
0
];
int
dila_w
=
dilations
[
1
];
int
dila_h
=
dilations
[
0
];
int
pad_w
=
paddings
[
1
];
int
pad_h
=
paddings
[
0
];
int
batch_size
=
input_dims
[
0
];
int
in_ch_size
=
input_dims
[
1
];
int
in_h
=
input_dims
[
2
];
int
in_w
=
input_dims
[
3
];
int
out_ch_size
=
output_dims
[
1
];
int
out_h
=
output_dims
[
2
];
int
out_w
=
output_dims
[
3
];
int
M
=
out_ch_size
*
kernel_w
*
kernel_h
/
groups
;
int
N
=
in_h
*
in_w
;
int
K
=
in_ch_size
/
groups
;
if
(
in_ch_size
!=
out_ch_size
||
groups
!=
in_ch_size
)
{
CHECK_EQ
(
in_ch_size
%
groups
,
0
);
CHECK_EQ
(
out_ch_size
%
groups
,
0
);
}
auto
workspace
=
std
::
vector
<
OType
>
(
groups
*
M
*
N
);
int
group_input_size
=
in_w
*
in_h
*
in_ch_size
/
groups
;
int
group_output_size
=
out_w
*
out_h
*
out_ch_size
/
groups
;
int
group_col_size
=
M
*
N
;
int
group_filter_size
=
in_ch_size
*
out_ch_size
*
kernel_w
*
kernel_h
/
(
groups
*
groups
);
bool
flag_1x1s1p1
=
(
kernel_w
==
1
)
&&
(
kernel_h
==
1
)
&&
(
stride_h
==
1
)
&&
(
stride_w
==
1
)
&&
(
pad_w
==
1
)
&&
(
pad_h
==
1
)
&&
(
dila_w
==
1
)
&&
(
dila_h
==
1
);
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
input_data
+=
n
*
in_ch_size
*
in_h
*
in_w
;
output_data
+=
n
*
out_ch_size
*
out_h
*
out_w
;
auto
col_data
=
workspace
.
data
();
if
(
flag_1x1s1p1
)
{
col_data
=
output_data
;
}
memset
(
col_data
,
0
,
sizeof
(
OType
)
*
group_col_size
);
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
auto
input_group_data
=
input_data
+
g
*
group_input_size
;
auto
filter_group_data
=
filter_data
+
g
*
group_filter_size
;
auto
col_group_data
=
col_data
+
g
*
group_col_size
;
gemm
<
IType
,
OType
>
(
M
,
N
,
K
,
filter_group_data
,
input_group_data
,
col_group_data
,
static_cast
<
OType
>
(
1
),
static_cast
<
OType
>
(
0
),
true
,
false
);
}
if
(
!
flag_1x1s1p1
)
{
col2im
(
col_data
,
out_ch_size
,
out_h
,
out_w
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dila_h
,
dila_w
,
output_data
);
}
add_bias_with_relu
(
output_data
,
bias_data
,
out_ch_size
,
out_w
*
out_h
,
fuse_relu
);
}
}
void
test_conv_transpose
(
int
bs
,
int
ic
,
int
ih
,
int
iw
,
bool
has_bias
,
bool
fuse_relu
,
int
filters
,
int
groups
,
int
dilation
,
int
stride
,
int
padding
,
int
kernel
)
{
// prepare input&output variables
Scope
scope
;
std
::
string
input_var_name
(
"input"
);
std
::
string
filter_var_name
(
"filter"
);
std
::
string
bias_var_name
(
"bias"
);
std
::
string
output_var_name
(
"output"
);
std
::
string
output_ref_var_name
(
"output_ref"
);
auto
*
input
=
scope
.
Var
(
input_var_name
)
->
GetMutable
<
Tensor
>
();
auto
*
filter
=
scope
.
Var
(
filter_var_name
)
->
GetMutable
<
Tensor
>
();
auto
*
bias
=
scope
.
Var
(
bias_var_name
)
->
GetMutable
<
Tensor
>
();
auto
*
output
=
scope
.
Var
(
output_var_name
)
->
GetMutable
<
Tensor
>
();
auto
*
output_ref
=
scope
.
Var
(
output_ref_var_name
)
->
GetMutable
<
Tensor
>
();
// get group size and input&filter shape
std
::
vector
<
int64_t
>
input_shape
=
{
bs
,
ic
,
ih
,
iw
};
std
::
vector
<
int64_t
>
filter_shape
=
{
ic
,
filters
,
kernel
,
kernel
};
input
->
Resize
(
input_shape
);
filter
->
Resize
(
filter_shape
);
// initialize input&output data
FillTensor
<
float
,
int
>
(
input
);
FillTensor
<
float
,
int
>
(
filter
);
// initialize op desc
cpp
::
OpDesc
opdesc
;
opdesc
.
SetType
(
"conv2d_transpose"
);
opdesc
.
SetInput
(
"Input"
,
{
input_var_name
});
opdesc
.
SetInput
(
"Filter"
,
{
filter_var_name
});
opdesc
.
SetOutput
(
"Output"
,
{
output_var_name
});
opdesc
.
SetAttr
(
"dilations"
,
std
::
vector
<
int32_t
>
({
dilation
,
dilation
}));
opdesc
.
SetAttr
(
"strides"
,
std
::
vector
<
int32_t
>
({
stride
,
stride
}));
opdesc
.
SetAttr
(
"paddings"
,
std
::
vector
<
int32_t
>
({
padding
,
padding
,
padding
,
padding
}));
opdesc
.
SetAttr
(
"groups"
,
groups
);
opdesc
.
SetAttr
(
"fuse_relu"
,
static_cast
<
bool
>
(
fuse_relu
));
if
(
has_bias
)
{
bias
->
Resize
({
1
,
filters
*
groups
,
1
,
1
});
FillTensor
<
float
,
int
>
(
bias
);
opdesc
.
SetInput
(
"Bias"
,
{
bias_var_name
});
}
// create and convert op to NPU model, then run it on NPU
auto
op
=
CreateOp
<
operators
::
ConvTransposeOpLite
>
(
opdesc
,
&
scope
);
LauchOp
(
op
,
{
input_var_name
},
{
output_var_name
});
output_ref
->
CopyDataFrom
(
*
output
);
// execute reference implementation and save to output tensor('out')
conv_transpose_ref
<
float
,
float
>
(
op
);
// compare results
auto
*
output_data
=
output
->
mutable_data
<
float
>
();
auto
*
output_ref_data
=
output_ref
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
output
->
dims
().
production
();
i
++
)
{
VLOG
(
5
)
<<
i
;
EXPECT_NEAR
(
output_data
[
i
],
output_ref_data
[
i
],
1e-5
);
}
}
TEST
(
NPUBridges
,
conv_transpose
)
{
#if 1
for
(
auto
bs
:
{
1
,
2
})
{
for
(
auto
ic
:
{
3
,
6
})
{
for
(
auto
ih
:
{
14
,
28
})
{
for
(
auto
iw
:
{
14
,
28
})
{
for
(
auto
has_bias
:
{
false
,
true
})
{
for
(
auto
fuse_relu
:
{
false
,
true
})
{
for
(
auto
filters
:
{
1
,
2
,
5
})
{
for
(
auto
groups
:
{
1
/* , 2, 5*/
})
{
for
(
auto
dilation
:
{
1
,
2
})
{
for
(
auto
stride
:
{
1
,
2
})
{
for
(
auto
kernel
:
{
1
,
3
,
5
})
{
std
::
vector
<
int
>
paddings
=
{
kernel
/
2
};
if
(
kernel
/
2
!=
0
)
{
paddings
.
push_back
(
0
);
}
for
(
auto
padding
:
paddings
)
{
VLOG
(
3
)
<<
"bs: "
<<
bs
<<
" ic: "
<<
ic
<<
" ih: "
<<
ih
<<
" iw: "
<<
iw
<<
" has_bias: "
<<
has_bias
<<
" fuse_relu: "
<<
fuse_relu
<<
" filters: "
<<
filters
<<
" groups: "
<<
groups
<<
" dilation: "
<<
dilation
<<
" stride: "
<<
stride
<<
" padding: "
<<
padding
<<
" kernel: "
<<
kernel
;
test_conv_transpose
(
bs
,
ic
,
ih
,
iw
,
has_bias
,
fuse_relu
,
filters
,
groups
,
dilation
,
stride
,
padding
,
kernel
);
}
}
}
}
}
}
}
}
}
}
}
}
#else
test_conv_transpose
(
1
,
6
,
8
,
8
,
false
,
false
,
5
,
2
,
1
,
1
,
1
,
3
);
#endif
}
}
// namespace bridges
}
// namespace npu
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_OP
(
conv2d_transpose
);
USE_NPU_BRIDGE
(
conv2d_transpose
);
lite/operators/conv_transpose_op.cc
浏览文件 @
b14e21c3
...
...
@@ -11,10 +11,12 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/conv_transpose_op.h"
#include <memory>
#include "lite/core/op_lite.h"
#include "lite/core/op_registry.h"
#include "lite/operators/conv_op.h"
namespace
paddle
{
namespace
lite
{
...
...
@@ -50,34 +52,6 @@ inline int ConvTransposeOutputSize(int input_size,
return
output_size
;
}
inline
void
UpdatePaddingAndDilation
(
std
::
vector
<
int
>*
paddings
,
std
::
vector
<
int
>*
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
string
padding_algorithm
,
const
lite
::
DDim
data_dims
,
const
lite
::
DDim
&
ksize
)
{
// when padding_desc is "VALID" or "SAME"
if
(
padding_algorithm
==
"SAME"
)
{
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
int
out_size
=
(
data_dims
[
i
+
2
]
+
strides
[
i
]
-
1
)
/
strides
[
i
];
int
pad_sum
=
std
::
max
(
(
out_size
-
1
)
*
strides
[
i
]
+
ksize
[
i
+
2
]
-
data_dims
[
i
+
2
],
(
int64_t
)
0
);
int
pad_0
=
pad_sum
/
2
;
int
pad_1
=
pad_sum
-
pad_0
;
// pad
*
(
paddings
->
begin
()
+
i
*
2
)
=
pad_0
;
*
(
paddings
->
begin
()
+
i
*
2
+
1
)
=
pad_1
;
// dilation
*
(
dilations
->
begin
()
+
i
)
=
1
;
}
}
else
if
(
padding_algorithm
==
"VALID"
)
{
for
(
auto
&
it
:
*
paddings
)
{
it
=
0
;
}
}
}
bool
ConvTransposeOpLite
::
InferShape
()
const
{
const
auto
in_dims
=
param_
.
x
->
dims
();
const
auto
filter_dims
=
param_
.
filter
->
dims
();
...
...
@@ -169,6 +143,7 @@ bool ConvTransposeOpLite::AttachImpl(const cpp::OpDesc& op_desc,
}
if
(
op_desc
.
HasAttr
(
"fuse_relu"
))
{
param_
.
fuse_relu
=
op_desc
.
GetAttr
<
bool
>
(
"fuse_relu"
);
param_
.
activation_param
.
active_type
=
lite_api
::
ActivationType
::
kRelu
;
}
if
(
op_desc
.
HasAttr
(
"output_size"
))
{
param_
.
output_size
=
op_desc
.
GetAttr
<
std
::
vector
<
int
>>
(
"output_size"
);
...
...
lite/tests/kernels/CMakeLists.txt
浏览文件 @
b14e21c3
if
((
NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM
)
AND
(
LITE_WITH_X86 OR LITE_WITH_ARM
))
lite_cc_test
(
test_kernel_conv_compute SRCS conv_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_conv_transpose_compute SRCS conv_transpose_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_scale_compute SRCS scale_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_power_compute SRCS power_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
...
...
lite/tests/kernels/conv_transpose_compute_test.cc
0 → 100644
浏览文件 @
b14e21c3
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h"
#include "lite/tests/utils/fill_data.h"
#include "lite/tests/utils/naive_math_impl.h"
namespace
paddle
{
namespace
lite
{
class
ConvTransposeComputeTester
:
public
arena
::
TestCase
{
protected:
// common attributes for this op.
std
::
string
op_type_
=
"conv2d_transpose"
;
std
::
string
input_
=
"input"
;
std
::
string
filter_
=
"filter"
;
std
::
string
output_
=
"output"
;
DDim
dims_
;
int
filter_channels_
=
1
;
std
::
vector
<
int
>
ksize_
{
3
,
3
};
std
::
vector
<
int
>
strides_
{
1
,
1
};
std
::
vector
<
int
>
paddings_
{
0
,
0
};
int
groups_
=
1
;
std
::
vector
<
int
>
dilations_
{
1
,
1
};
std
::
string
padding_algorithm_
=
""
;
std
::
vector
<
int
>
output_size_
{};
std
::
string
bias_
=
""
;
bool
fuse_relu_
=
false
;
public:
ConvTransposeComputeTester
(
const
Place
&
place
,
const
std
::
string
&
alias
,
DDim
dims
,
int
filter_channels
=
1
,
std
::
vector
<
int
>
ksize
=
{
3
,
3
},
std
::
vector
<
int
>
strides
=
{
1
,
1
},
std
::
vector
<
int
>
paddings
=
{
0
,
0
},
int
groups
=
1
,
std
::
vector
<
int
>
dilations
=
{
1
,
1
},
std
::
string
padding_algorithm
=
""
,
std
::
vector
<
int
>
output_size
=
{},
std
::
string
bias
=
""
,
bool
fuse_relu
=
false
)
:
TestCase
(
place
,
alias
),
dims_
(
dims
),
filter_channels_
(
filter_channels
),
ksize_
(
ksize
),
strides_
(
strides
),
paddings_
(
paddings
),
groups_
(
groups
),
dilations_
(
dilations
),
padding_algorithm_
(
padding_algorithm
),
output_size_
(
output_size
),
bias_
(
bias
),
fuse_relu_
(
fuse_relu
)
{}
void
RunBaseline
(
Scope
*
scope
)
override
{
if
(
paddings_
.
size
()
==
2L
)
{
paddings_
.
insert
(
paddings_
.
begin
(),
paddings_
[
0
]);
paddings_
.
insert
(
paddings_
.
begin
()
+
2
,
paddings_
[
2
]);
}
CHECK_EQ
(
paddings_
.
size
(),
4
);
if
(
padding_algorithm_
==
"SAME"
)
{
for
(
size_t
i
=
0
;
i
<
strides_
.
size
();
++
i
)
{
int
out_size
=
(
dims_
[
i
+
2
]
+
strides_
[
i
]
-
1
)
/
strides_
[
i
];
int
pad_sum
=
std
::
max
((
out_size
-
1
)
*
strides_
[
i
]
+
ksize_
[
i
]
-
dims_
[
i
+
2
],
(
int64_t
)
0
);
int
pad_0
=
pad_sum
/
2
;
int
pad_1
=
pad_sum
-
pad_0
;
// pad
paddings_
[
i
*
2
]
=
pad_0
;
paddings_
[
i
*
2
+
1
]
=
pad_1
;
// dilation
dilations_
[
i
]
=
1
;
}
}
else
if
(
padding_algorithm_
==
"VALID"
)
{
for
(
auto
&
it
:
paddings_
)
{
it
=
0
;
}
}
std
::
vector
<
int64_t
>
output_shape
{
dims_
[
0
],
filter_channels_
*
groups_
};
for
(
size_t
i
=
0
;
i
<
strides_
.
size
();
++
i
)
{
const
int
dkernel
=
dilations_
[
i
]
*
(
ksize_
[
i
]
-
1
)
+
1
;
int
output_size
=
(
dims_
[
i
+
2
]
-
1
)
*
strides_
[
i
]
-
paddings_
[
i
*
2
]
-
paddings_
[
i
*
2
+
1
]
+
dkernel
;
output_shape
.
push_back
(
output_size
);
}
if
(
!
output_size_
.
empty
())
{
for
(
size_t
i
=
0
;
i
<
output_size_
.
size
();
++
i
)
{
output_shape
[
i
+
2
]
=
output_size_
[
i
];
}
}
auto
output
=
scope
->
NewTensor
(
output_
);
output
->
Resize
(
output_shape
);
const
Tensor
*
input
=
scope
->
FindTensor
(
input_
);
const
Tensor
*
filter
=
scope
->
FindTensor
(
filter_
);
const
Tensor
*
bias
=
scope
->
FindTensor
(
bias_
);
auto
input_dims
=
input
->
dims
();
auto
filter_dims
=
filter
->
dims
();
auto
output_dims
=
output
->
dims
();
auto
input_data
=
input
->
data
<
float
>
();
auto
filter_data
=
filter
->
data
<
float
>
();
auto
output_data
=
output
->
mutable_data
<
float
>
();
bool
flag_bias
=
bias
!=
nullptr
;
const
float
*
bias_data
=
flag_bias
?
bias
->
data
<
float
>
()
:
nullptr
;
deconv_basic
<
float
,
float
>
(
input_data
,
output_data
,
input_dims
[
0
],
output_dims
[
1
],
output_dims
[
2
],
output_dims
[
3
],
input_dims
[
1
],
input_dims
[
2
],
input_dims
[
3
],
filter_data
,
bias_data
,
groups_
,
filter_dims
[
3
],
filter_dims
[
2
],
strides_
[
1
],
strides_
[
0
],
dilations_
[
1
],
dilations_
[
0
],
paddings_
[
2
],
paddings_
[
3
],
paddings_
[
0
],
paddings_
[
1
],
flag_bias
,
fuse_relu_
);
}
void
PrepareOpDesc
(
cpp
::
OpDesc
*
op_desc
)
{
op_desc
->
SetType
(
op_type_
);
op_desc
->
SetInput
(
"Input"
,
{
input_
});
op_desc
->
SetInput
(
"Filter"
,
{
filter_
});
if
(
!
bias_
.
empty
())
{
op_desc
->
SetInput
(
"Bias"
,
{
bias_
});
}
op_desc
->
SetOutput
(
"Output"
,
{
output_
});
op_desc
->
SetAttr
(
"strides"
,
strides_
);
op_desc
->
SetAttr
(
"paddings"
,
paddings_
);
op_desc
->
SetAttr
(
"groups"
,
groups_
);
op_desc
->
SetAttr
(
"dilations"
,
dilations_
);
if
(
!
padding_algorithm_
.
empty
())
{
op_desc
->
SetAttr
(
"padding_algorithm"
,
padding_algorithm_
);
}
if
(
!
output_size_
.
empty
())
{
op_desc
->
SetAttr
(
"output_size"
,
output_size_
);
}
op_desc
->
SetAttr
(
"fuse_relu"
,
fuse_relu_
);
}
void
PrepareData
()
override
{
std
::
vector
<
float
>
din
(
dims_
.
production
());
fill_data_rand
(
din
.
data
(),
-
1.
f
,
1.
f
,
dims_
.
production
());
SetCommonTensor
(
input_
,
dims_
,
din
.
data
());
DDim
filter_dims
(
std
::
vector
<
int64_t
>
{
dims_
[
1
],
filter_channels_
,
ksize_
[
0
],
ksize_
[
1
]});
std
::
vector
<
float
>
dfilter
(
filter_dims
.
production
());
fill_data_rand
(
dfilter
.
data
(),
-
1.
f
,
1.
f
,
filter_dims
.
production
());
SetCommonTensor
(
filter_
,
filter_dims
,
dfilter
.
data
(),
{},
true
);
if
(
!
bias_
.
empty
())
{
DDim
bias_dims
(
std
::
vector
<
int64_t
>
{
filter_channels_
*
groups_
});
std
::
vector
<
float
>
dbias
(
bias_dims
.
production
());
fill_data_rand
(
din
.
data
(),
-
1.
f
,
1.
f
,
bias_dims
.
production
());
SetCommonTensor
(
bias_
,
bias_dims
,
dbias
.
data
(),
{},
true
);
}
}
};
void
TestConvTransposeKsize
(
Place
place
,
float
abs_error
=
2e-5
)
{
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{{
5
,
6
,
11
,
12
}})
{
for
(
auto
filter_channels
:
{
1
,
3
})
{
for
(
auto
ksize
:
std
::
vector
<
std
::
vector
<
int
>>
{{
1
,
1
},
{
2
,
2
},
{
3
,
3
},
{
2
,
3
}})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ConvTransposeComputeTester
(
place
,
"def"
,
DDim
(
dims
),
filter_channels
,
ksize
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
}
void
TestConvTransposeStrides
(
Place
place
,
float
abs_error
=
2e-5
)
{
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{{
5
,
6
,
11
,
12
}})
{
for
(
auto
strides
:
std
::
vector
<
std
::
vector
<
int
>>
{{
2
,
2
},
{
3
,
3
},
{
1
,
2
}})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ConvTransposeComputeTester
(
place
,
"def"
,
DDim
(
dims
),
3
,
{
3
,
3
},
strides
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
void
TestConvTransposePaddings
(
Place
place
,
float
abs_error
=
2e-5
)
{
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{{
5
,
6
,
11
,
12
}})
{
for
(
auto
paddings
:
std
::
vector
<
std
::
vector
<
int
>>
{
{
1
,
1
},
{
2
,
2
},
{
0
,
1
},
{
1
,
0
,
0
,
1
},
{
1
,
2
,
0
,
1
}})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ConvTransposeComputeTester
(
place
,
"def"
,
DDim
(
dims
),
3
,
{
3
,
3
},
{
1
,
1
},
paddings
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
void
TestConvTransposeGroups
(
Place
place
,
float
abs_error
=
2e-5
)
{
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{{
5
,
6
,
11
,
12
}})
{
for
(
auto
groups
:
{
2
,
3
,
6
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ConvTransposeComputeTester
(
place
,
"def"
,
DDim
(
dims
),
12
,
{
3
,
3
},
{
1
,
1
},
{
0
,
0
},
groups
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
void
TestConvTransposeDilations
(
Place
place
,
float
abs_error
=
2e-5
)
{
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{{
5
,
6
,
11
,
12
}})
{
for
(
auto
dilations
:
std
::
vector
<
std
::
vector
<
int
>>
{{
2
,
2
},
{
1
,
2
}})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ConvTransposeComputeTester
(
place
,
"def"
,
DDim
(
dims
),
3
,
{
3
,
3
},
{
1
,
1
},
{
0
,
0
},
1
,
dilations
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
void
TestConvTransposePaddingAlgorithm
(
Place
place
,
float
abs_error
=
2e-5
)
{
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{{
5
,
6
,
11
,
12
}})
{
for
(
auto
padding_algorithm
:
std
::
vector
<
std
::
string
>
{
"SAME"
,
"VALID"
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ConvTransposeComputeTester
(
place
,
"def"
,
DDim
(
dims
),
3
,
{
3
,
3
},
{
2
,
2
},
{
0
,
0
},
1
,
{
1
,
1
},
padding_algorithm
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
void
TestConvTransposeOutputSize
(
Place
place
,
float
abs_error
=
2e-5
)
{
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{{
5
,
6
,
12
,
12
}})
{
for
(
auto
output_size
:
std
::
vector
<
std
::
vector
<
int
>>
{{
25
,
26
},
{
26
,
26
}})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ConvTransposeComputeTester
(
place
,
"def"
,
DDim
(
dims
),
3
,
{
3
,
3
},
{
2
,
2
},
{
0
,
0
},
1
,
{
1
,
1
},
""
,
output_size
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
void
TestConvTransposeBiasRelu
(
Place
place
,
float
abs_error
=
2e-5
)
{
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{{
5
,
6
,
11
,
12
}})
{
for
(
auto
bias
:
std
::
vector
<
std
::
string
>
{
""
,
"bias"
})
{
for
(
bool
fuse_relu
:
{
true
,
false
})
{
if
(
bias
.
empty
()
&&
fuse_relu
)
continue
;
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ConvTransposeComputeTester
(
place
,
"def"
,
DDim
(
dims
),
3
,
{
3
,
3
},
{
1
,
1
},
{
0
,
0
},
1
,
{
1
,
1
},
""
,
{},
bias
,
fuse_relu
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
}
TEST
(
Conv_transpose
,
precision
)
{
float
abs_error
=
2e-5
;
Place
place
;
#if defined(LITE_WITH_NPU)
place
=
TARGET
(
kNPU
);
abs_error
=
5e-2
;
// Using fp16 in NPU
#else
return
;
#endif
TestConvTransposeKsize
(
place
,
abs_error
);
TestConvTransposeStrides
(
place
,
abs_error
);
TestConvTransposePaddings
(
place
,
abs_error
);
TestConvTransposeGroups
(
place
,
abs_error
);
TestConvTransposeDilations
(
place
,
abs_error
);
TestConvTransposePaddingAlgorithm
(
place
,
abs_error
);
TestConvTransposeOutputSize
(
place
,
abs_error
);
TestConvTransposeBiasRelu
(
place
,
abs_error
);
}
}
// namespace lite
}
// namespace paddle
lite/tests/utils/naive_math_impl.h
浏览文件 @
b14e21c3
...
...
@@ -407,7 +407,6 @@ void deconv_basic(const Dtype1* din,
int
k
=
chin
/
group
;
int
group_size_in
=
win
*
hin
*
chin
/
group
;
int
group_size_out
=
wout
*
hout
*
chout
/
group
;
int
group_size_coldata
=
m
*
n
;
int
group_size_weights
=
chin
*
chout
*
kernel_w
*
kernel_h
/
(
group
*
group
);
bool
flag_1x1s1p1
=
(
kernel_w
==
1
)
&&
(
kernel_h
==
1
)
&&
(
stride_h
==
1
)
&&
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录