Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
be2884eb
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
be2884eb
编写于
11月 04, 2021
作者:
Z
zhulei
提交者:
GitHub
11月 04, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] Add bilinear_interpolate_v2 (#36971)
上级
4977eb22
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
666 addition
and
38 deletion
+666
-38
paddle/fluid/operators/interpolate_v2_op_npu.cc
paddle/fluid/operators/interpolate_v2_op_npu.cc
+386
-38
python/paddle/fluid/tests/unittests/npu/CMakeLists.txt
python/paddle/fluid/tests/unittests/npu/CMakeLists.txt
+1
-0
python/paddle/fluid/tests/unittests/npu/test_bilinear_interp_v2_op_npu.py
...uid/tests/unittests/npu/test_bilinear_interp_v2_op_npu.py
+279
-0
未找到文件。
paddle/fluid/operators/interpolate_v2_op_npu.cc
浏览文件 @
be2884eb
...
@@ -20,6 +20,369 @@ namespace operators {
...
@@ -20,6 +20,369 @@ namespace operators {
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
using
DataLayout
=
framework
::
DataLayout
;
using
DataLayout
=
framework
::
DataLayout
;
using
DDim
=
framework
::
DDim
;
using
fp16
=
paddle
::
platform
::
float16
;
template
<
typename
T
>
struct
InterpolateFunction
{
public:
explicit
InterpolateFunction
(
const
framework
::
ExecutionContext
&
ctx
)
:
ctx
(
ctx
)
{
place
=
ctx
.
GetPlace
();
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
t0
.
mutable_data
<
float
>
({
1
},
place
);
t1
.
mutable_data
<
float
>
({
1
},
place
);
tn
.
mutable_data
<
float
>
({
1
},
place
);
FillNpuTensorWithConstant
<
float
>
(
&
t0
,
static_cast
<
float
>
(
0
));
FillNpuTensorWithConstant
<
float
>
(
&
t1
,
static_cast
<
float
>
(
1
));
}
void
Arange
(
int
n
,
Tensor
*
x
)
{
FillNpuTensorWithConstant
<
float
>
(
&
tn
,
static_cast
<
float
>
(
n
));
const
auto
&
runner
=
NpuOpRunner
(
"Range"
,
{
t0
,
tn
,
t1
},
{
*
x
},
{});
runner
.
Run
(
stream
);
}
void
ReduceSum
(
const
Tensor
*
x
,
Tensor
*
y
,
const
std
::
vector
<
int
>&
dim
,
bool
keep_dims
=
true
)
{
const
auto
&
runner
=
NpuOpRunner
(
"ReduceSumD"
,
{
*
x
},
{
*
y
},
{{
"axes"
,
dim
},
{
"keep_dims"
,
keep_dims
}});
runner
.
Run
(
stream
);
}
void
Add
(
const
Tensor
*
x
,
const
Tensor
*
y
,
Tensor
*
z
)
{
const
auto
&
runner
=
NpuOpRunner
(
"AddV2"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
Adds
(
const
Tensor
*
x
,
float
scalar
,
Tensor
*
y
)
{
const
auto
&
runner
=
NpuOpRunner
(
"Adds"
,
{
*
x
},
{
*
y
},
{{
"value"
,
scalar
}});
runner
.
Run
(
stream
);
}
void
Mul
(
const
Tensor
*
x
,
const
Tensor
*
y
,
Tensor
*
z
)
{
const
auto
&
runner
=
NpuOpRunner
(
"Mul"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
Sub
(
const
Tensor
*
x
,
const
Tensor
*
y
,
Tensor
*
z
)
{
const
auto
&
runner
=
NpuOpRunner
(
"Sub"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
Cast
(
const
Tensor
*
x
,
Tensor
*
y
)
{
auto
dst_dtype
=
ConvertToNpuDtype
(
y
->
type
());
const
auto
&
runner
=
NpuOpRunner
(
"Cast"
,
{
*
x
},
{
*
y
},
{{
"dst_type"
,
static_cast
<
int
>
(
dst_dtype
)}});
runner
.
Run
(
stream
);
}
void
Gather
(
const
Tensor
*
x
,
const
Tensor
*
indices
,
const
int
axis
,
Tensor
*
y
)
{
const
auto
&
runner
=
NpuOpRunner
(
"GatherV2D"
,
{
*
x
,
*
indices
},
{
*
y
},
{{
"axis"
,
axis
}});
runner
.
Run
(
stream
);
}
void
GatherGrad
(
const
Tensor
*
gy
,
const
Tensor
*
indices
,
const
int
axis
,
Tensor
*
gx
)
{
// 1 gy swapaxis: axis & 0
int
len
=
(
gy
->
dims
()).
size
();
std
::
vector
<
int
>
axis_swap
(
len
);
for
(
int
i
=
0
;
i
<
len
;
i
++
)
{
axis_swap
[
i
]
=
i
;
}
axis_swap
[
0
]
=
axis
;
axis_swap
[
axis
]
=
0
;
auto
y_new_shape
=
gy
->
dims
();
auto
yt
=
y_new_shape
[
axis
];
y_new_shape
[
axis
]
=
y_new_shape
[
0
];
y_new_shape
[
0
]
=
yt
;
Tensor
gy_t
;
gy_t
.
mutable_data
<
T
>
(
y_new_shape
,
place
);
Transpose
(
gy
,
&
gy_t
,
axis_swap
);
// 2 scatter
auto
x_new_shape
=
gx
->
dims
();
auto
xt
=
x_new_shape
[
axis
];
x_new_shape
[
axis
]
=
x_new_shape
[
0
];
x_new_shape
[
0
]
=
xt
;
Tensor
gx_zero
,
gx_t
;
gx_zero
.
mutable_data
<
T
>
(
x_new_shape
,
place
);
gx_t
.
mutable_data
<
T
>
(
x_new_shape
,
place
);
FillNpuTensorWithConstant
<
T
>
(
&
gx_zero
,
static_cast
<
T
>
(
0
));
gx_zero
.
Resize
(
x_new_shape
);
Scatter
(
&
gx_zero
,
indices
,
&
gy_t
,
&
gx_t
);
// 3 gx swapaxis: axis, 0
Transpose
(
&
gx_t
,
gx
,
axis_swap
);
}
void
Scatter
(
const
Tensor
*
x
,
const
Tensor
*
index
,
const
Tensor
*
updates
,
Tensor
*
y
)
{
const
auto
&
runner
=
NpuOpRunner
(
"TensorScatterAdd"
,
{
*
x
,
*
index
,
*
updates
},
{
*
y
},
{});
runner
.
Run
(
stream
);
}
void
Transpose
(
const
Tensor
*
x
,
Tensor
*
y
,
const
std
::
vector
<
int
>&
axis
)
{
const
auto
&
runner
=
NpuOpRunner
(
"TransposeD"
,
{
*
x
},
{
*
y
},
{{
"perm"
,
axis
}});
runner
.
Run
(
stream
);
}
void
Muls
(
const
Tensor
*
x
,
float
scalar
,
Tensor
*
y
)
{
const
auto
&
runner
=
NpuOpRunner
(
"Muls"
,
{
*
x
},
{
*
y
},
{{
"value"
,
scalar
}});
runner
.
Run
(
stream
);
}
void
Maximum
(
const
Tensor
*
x
,
const
Tensor
*
y
,
Tensor
*
z
)
{
const
auto
&
runner
=
NpuOpRunner
(
"Maximum"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
Minimum
(
const
Tensor
*
x
,
const
Tensor
*
y
,
Tensor
*
z
)
{
const
auto
&
runner
=
NpuOpRunner
(
"Minimum"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
Floor
(
const
Tensor
*
x
,
Tensor
*
y
)
{
const
auto
&
runner
=
NpuOpRunner
(
"Floor"
,
{
*
x
},
{
*
y
},
{});
runner
.
Run
(
stream
);
}
private:
platform
::
Place
place
;
aclrtStream
stream
;
const
framework
::
ExecutionContext
&
ctx
;
Tensor
t0
;
Tensor
t1
;
Tensor
tn
;
};
template
<
>
void
InterpolateFunction
<
fp16
>::
Arange
(
int
n
,
Tensor
*
x
)
{
Tensor
x_fp32
(
framework
::
proto
::
VarType
::
FP32
);
x_fp32
.
mutable_data
<
float
>
(
x
->
dims
(),
place
);
FillNpuTensorWithConstant
<
float
>
(
&
tn
,
static_cast
<
float
>
(
n
));
const
auto
&
runner
=
NpuOpRunner
(
"Range"
,
{
t0
,
tn
,
t1
},
{
x_fp32
},
{});
runner
.
Run
(
stream
);
Cast
(
&
x_fp32
,
x
);
}
void
InterpolateParamCompute
(
const
float
scale_h
,
const
float
scale_w
,
const
bool
align_corners
,
const
int
align_mode
,
const
DataLayout
&
data_layout
,
const
DDim
&
indim
,
const
DDim
&
outdim
,
int
*
axis_h
,
int
*
axis_w
,
int
*
in_h
,
int
*
in_w
,
int
*
out_h
,
int
*
out_w
,
float
*
ratio_h
,
float
*
ratio_w
)
{
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
*
axis_h
=
2
;
*
axis_w
=
3
;
}
else
{
*
axis_h
=
1
;
*
axis_w
=
2
;
}
*
out_h
=
outdim
[
*
axis_h
];
*
out_w
=
outdim
[
*
axis_w
];
*
in_h
=
indim
[
*
axis_h
];
*
in_w
=
indim
[
*
axis_w
];
*
ratio_h
=
0.0
f
;
*
ratio_w
=
0.0
f
;
if
(
*
out_h
>
1
)
{
*
ratio_h
=
align_corners
?
static_cast
<
float
>
(
*
in_h
-
1
)
/
(
*
out_h
-
1
)
:
(
scale_h
>
0
?
1
/
scale_h
:
static_cast
<
float
>
(
*
in_h
)
/
*
out_h
);
}
if
(
*
out_w
>
1
)
{
*
ratio_w
=
align_corners
?
static_cast
<
float
>
(
*
in_w
-
1
)
/
(
*
out_w
-
1
)
:
(
scale_w
>
0
?
1
/
scale_w
:
static_cast
<
float
>
(
*
in_w
)
/
*
out_w
);
}
}
template
<
typename
T
>
void
BilinearParamTensorCompute
(
const
framework
::
ExecutionContext
&
ctx
,
const
DataLayout
&
data_layout
,
int
in_h
,
int
in_w
,
int
out_h
,
int
out_w
,
bool
align_cond
,
float
ratio_h
,
float
ratio_w
,
Tensor
*
h0
,
Tensor
*
h1
,
Tensor
*
w0
,
Tensor
*
w1
,
Tensor
*
coef_h0
,
Tensor
*
coef_h1
,
Tensor
*
coef_w0
,
Tensor
*
coef_w1
)
{
InterpolateFunction
<
T
>
F
(
ctx
);
auto
place
=
ctx
.
GetPlace
();
Tensor
_h0
,
_w0
;
_h0
.
mutable_data
<
T
>
({
out_h
},
place
);
_w0
.
mutable_data
<
T
>
({
out_w
},
place
);
F
.
Arange
(
out_h
,
&
_h0
);
F
.
Arange
(
out_w
,
&
_w0
);
if
(
align_cond
)
{
F
.
Adds
(
&
_h0
,
static_cast
<
float
>
(
0.5
),
&
_h0
);
F
.
Adds
(
&
_w0
,
static_cast
<
float
>
(
0.5
),
&
_w0
);
F
.
Muls
(
&
_h0
,
ratio_h
,
&
_h0
);
F
.
Muls
(
&
_w0
,
ratio_w
,
&
_w0
);
F
.
Adds
(
&
_h0
,
static_cast
<
float
>
(
-
0.5
),
&
_h0
);
F
.
Adds
(
&
_w0
,
static_cast
<
float
>
(
-
0.5
),
&
_w0
);
}
else
{
F
.
Muls
(
&
_h0
,
ratio_h
,
&
_h0
);
F
.
Muls
(
&
_w0
,
ratio_w
,
&
_w0
);
}
Tensor
zero_t
;
Tensor
one_t
;
zero_t
.
mutable_data
<
T
>
({
1
},
place
);
one_t
.
mutable_data
<
T
>
({
1
},
place
);
FillNpuTensorWithConstant
<
T
>
(
&
zero_t
,
static_cast
<
T
>
(
0
));
FillNpuTensorWithConstant
<
T
>
(
&
one_t
,
static_cast
<
T
>
(
1
));
F
.
Maximum
(
&
_h0
,
&
zero_t
,
&
_h0
);
F
.
Maximum
(
&
_w0
,
&
zero_t
,
&
_w0
);
Tensor
_h0_floor
,
_w0_floor
;
_h0_floor
.
mutable_data
<
T
>
({
out_h
},
place
);
_w0_floor
.
mutable_data
<
T
>
({
out_w
},
place
);
F
.
Floor
(
&
_h0
,
&
_h0_floor
);
F
.
Floor
(
&
_w0
,
&
_w0_floor
);
F
.
Cast
(
&
_h0_floor
,
h0
);
F
.
Cast
(
&
_w0_floor
,
w0
);
Tensor
one_int
;
one_int
.
mutable_data
<
int
>
({
1
},
place
);
FillNpuTensorWithConstant
<
int
>
(
&
one_int
,
static_cast
<
int
>
(
1
));
F
.
Add
(
h0
,
&
one_int
,
h1
);
F
.
Add
(
w0
,
&
one_int
,
w1
);
Tensor
t_max_h
,
t_max_w
;
t_max_h
.
mutable_data
<
int
>
({
1
},
place
);
t_max_w
.
mutable_data
<
int
>
({
1
},
place
);
FillNpuTensorWithConstant
<
int
>
(
&
t_max_h
,
static_cast
<
int
>
(
in_h
-
1
));
FillNpuTensorWithConstant
<
int
>
(
&
t_max_w
,
static_cast
<
int
>
(
in_w
-
1
));
F
.
Minimum
(
h1
,
&
t_max_h
,
h1
);
F
.
Minimum
(
w1
,
&
t_max_w
,
w1
);
F
.
Sub
(
&
_h0
,
&
_h0_floor
,
coef_h1
);
F
.
Sub
(
&
_w0
,
&
_w0_floor
,
coef_w1
);
F
.
Sub
(
&
one_t
,
coef_h1
,
coef_h0
);
F
.
Sub
(
&
one_t
,
coef_w1
,
coef_w0
);
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
coef_h0
->
Resize
({
out_h
,
1
});
coef_h1
->
Resize
({
out_h
,
1
});
}
else
{
coef_h0
->
Resize
({
out_h
,
1
,
1
});
coef_h1
->
Resize
({
out_h
,
1
,
1
});
coef_w0
->
Resize
({
out_w
,
1
});
coef_w1
->
Resize
({
out_w
,
1
});
}
}
template
<
typename
T
>
void
BilinearFwdNpu
(
const
framework
::
ExecutionContext
&
ctx
,
const
Tensor
*
input
,
Tensor
*
output
,
const
float
scale_h
,
const
float
scale_w
,
const
bool
align_corners
,
const
int
align_mode
,
const
DataLayout
&
data_layout
)
{
InterpolateFunction
<
T
>
F
(
ctx
);
auto
place
=
ctx
.
GetPlace
();
auto
outdim
=
output
->
dims
();
auto
indim
=
input
->
dims
();
int
axis_h
,
axis_w
;
int
out_h
,
out_w
,
in_h
,
in_w
;
float
ratio_h
,
ratio_w
;
InterpolateParamCompute
(
scale_h
,
scale_w
,
align_corners
,
align_mode
,
data_layout
,
indim
,
outdim
,
&
axis_h
,
&
axis_w
,
&
in_h
,
&
in_w
,
&
out_h
,
&
out_w
,
&
ratio_h
,
&
ratio_w
);
Tensor
h0
,
h1
,
w0
,
w1
;
h0
.
mutable_data
<
int
>
({
out_h
},
place
);
h1
.
mutable_data
<
int
>
({
out_h
},
place
);
w0
.
mutable_data
<
int
>
({
out_w
},
place
);
w1
.
mutable_data
<
int
>
({
out_w
},
place
);
Tensor
coef_h0
,
coef_h1
,
coef_w0
,
coef_w1
;
coef_h0
.
mutable_data
<
T
>
({
out_h
},
place
);
coef_h1
.
mutable_data
<
T
>
({
out_h
},
place
);
coef_w0
.
mutable_data
<
T
>
({
out_w
},
place
);
coef_w1
.
mutable_data
<
T
>
({
out_w
},
place
);
bool
align_cond
=
align_mode
==
0
&&
!
align_corners
;
BilinearParamTensorCompute
<
T
>
(
ctx
,
data_layout
,
in_h
,
in_w
,
out_h
,
out_w
,
align_cond
,
ratio_h
,
ratio_w
,
&
h0
,
&
h1
,
&
w0
,
&
w1
,
&
coef_h0
,
&
coef_h1
,
&
coef_w0
,
&
coef_w1
);
Tensor
input_gather_h0
,
input_gather_h1
;
auto
dim_gather_h
=
indim
;
dim_gather_h
[
axis_h
]
=
out_h
;
input_gather_h0
.
mutable_data
<
T
>
(
dim_gather_h
,
place
);
input_gather_h1
.
mutable_data
<
T
>
(
dim_gather_h
,
place
);
F
.
Gather
(
input
,
&
h0
,
axis_h
,
&
input_gather_h0
);
F
.
Gather
(
input
,
&
h1
,
axis_h
,
&
input_gather_h1
);
F
.
Mul
(
&
input_gather_h0
,
&
coef_h0
,
&
input_gather_h0
);
F
.
Mul
(
&
input_gather_h1
,
&
coef_h1
,
&
input_gather_h1
);
Tensor
out_x4
;
out_x4
.
mutable_data
<
T
>
({
4
,
outdim
[
0
],
outdim
[
1
],
outdim
[
2
],
outdim
[
3
]},
place
);
Tensor
input_gather_h0_w0
=
out_x4
.
Slice
(
0
,
1
);
Tensor
input_gather_h0_w1
=
out_x4
.
Slice
(
1
,
2
);
Tensor
input_gather_h1_w0
=
out_x4
.
Slice
(
2
,
3
);
Tensor
input_gather_h1_w1
=
out_x4
.
Slice
(
3
,
4
);
F
.
Gather
(
&
input_gather_h0
,
&
w0
,
axis_w
,
&
input_gather_h0_w0
);
F
.
Gather
(
&
input_gather_h0
,
&
w1
,
axis_w
,
&
input_gather_h0_w1
);
F
.
Gather
(
&
input_gather_h1
,
&
w0
,
axis_w
,
&
input_gather_h1_w0
);
F
.
Gather
(
&
input_gather_h1
,
&
w1
,
axis_w
,
&
input_gather_h1_w1
);
F
.
Mul
(
&
input_gather_h0_w0
,
&
coef_w0
,
&
input_gather_h0_w0
);
F
.
Mul
(
&
input_gather_h0_w1
,
&
coef_w1
,
&
input_gather_h0_w1
);
F
.
Mul
(
&
input_gather_h1_w0
,
&
coef_w0
,
&
input_gather_h1_w0
);
F
.
Mul
(
&
input_gather_h1_w1
,
&
coef_w1
,
&
input_gather_h1_w1
);
F
.
ReduceSum
(
&
out_x4
,
output
,
std
::
vector
<
int
>
{
0
},
false
);
}
template
<
typename
T
>
void
BilinearBwdNpu
(
const
framework
::
ExecutionContext
&
ctx
,
const
Tensor
*
gout
,
Tensor
*
gin
,
const
float
scale_h
,
const
float
scale_w
,
const
bool
align_corners
,
const
int
align_mode
,
const
DataLayout
&
data_layout
)
{
InterpolateFunction
<
T
>
F
(
ctx
);
auto
place
=
ctx
.
GetPlace
();
auto
outdim
=
gout
->
dims
();
auto
indim
=
gin
->
dims
();
int
axis_h
,
axis_w
;
int
out_h
,
out_w
,
in_h
,
in_w
;
float
ratio_h
,
ratio_w
;
InterpolateParamCompute
(
scale_h
,
scale_w
,
align_corners
,
align_mode
,
data_layout
,
indim
,
outdim
,
&
axis_h
,
&
axis_w
,
&
in_h
,
&
in_w
,
&
out_h
,
&
out_w
,
&
ratio_h
,
&
ratio_w
);
Tensor
h0
,
h1
,
w0
,
w1
;
h0
.
mutable_data
<
int
>
({
out_h
},
place
);
h1
.
mutable_data
<
int
>
({
out_h
},
place
);
w0
.
mutable_data
<
int
>
({
out_w
},
place
);
w1
.
mutable_data
<
int
>
({
out_w
},
place
);
Tensor
coef_h0
,
coef_h1
,
coef_w0
,
coef_w1
;
coef_h0
.
mutable_data
<
T
>
({
out_h
},
place
);
coef_h1
.
mutable_data
<
T
>
({
out_h
},
place
);
coef_w0
.
mutable_data
<
T
>
({
out_w
},
place
);
coef_w1
.
mutable_data
<
T
>
({
out_w
},
place
);
bool
align_cond
=
align_mode
==
0
&&
!
align_corners
;
BilinearParamTensorCompute
<
T
>
(
ctx
,
data_layout
,
in_h
,
in_w
,
out_h
,
out_w
,
align_cond
,
ratio_h
,
ratio_w
,
&
h0
,
&
h1
,
&
w0
,
&
w1
,
&
coef_h0
,
&
coef_h1
,
&
coef_w0
,
&
coef_w1
);
Tensor
gy_w0
,
gy_w1
;
gy_w0
.
mutable_data
<
T
>
(
outdim
,
place
);
gy_w1
.
mutable_data
<
T
>
(
outdim
,
place
);
F
.
Mul
(
gout
,
&
coef_w0
,
&
gy_w0
);
F
.
Mul
(
gout
,
&
coef_w1
,
&
gy_w1
);
auto
dim_gather_h
=
indim
;
dim_gather_h
[
axis_h
]
=
out_h
;
Tensor
g_gather_w0
,
g_gather_w1
;
g_gather_w0
.
mutable_data
<
T
>
(
dim_gather_h
,
place
);
g_gather_w1
.
mutable_data
<
T
>
(
dim_gather_h
,
place
);
w0
.
Resize
({
out_w
,
1
});
w1
.
Resize
({
out_w
,
1
});
F
.
GatherGrad
(
&
gy_w0
,
&
w0
,
axis_w
,
&
g_gather_w0
);
F
.
GatherGrad
(
&
gy_w1
,
&
w1
,
axis_w
,
&
g_gather_w1
);
F
.
Add
(
&
g_gather_w0
,
&
g_gather_w1
,
&
g_gather_w0
);
F
.
Mul
(
&
g_gather_w0
,
&
coef_h1
,
&
g_gather_w1
);
F
.
Mul
(
&
g_gather_w0
,
&
coef_h0
,
&
g_gather_w0
);
Tensor
gx_0
,
gx_1
;
gx_0
.
mutable_data
<
T
>
(
indim
,
place
);
gx_1
.
mutable_data
<
T
>
(
indim
,
place
);
h0
.
Resize
({
out_h
,
1
});
h1
.
Resize
({
out_h
,
1
});
F
.
GatherGrad
(
&
g_gather_w0
,
&
h0
,
axis_h
,
&
gx_0
);
F
.
GatherGrad
(
&
g_gather_w1
,
&
h1
,
axis_h
,
&
gx_1
);
F
.
Add
(
&
gx_0
,
&
gx_1
,
gin
);
}
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
InterpolateV2NPUKernel
:
public
framework
::
OpKernel
<
T
>
{
class
InterpolateV2NPUKernel
:
public
framework
::
OpKernel
<
T
>
{
...
@@ -39,19 +402,6 @@ class InterpolateV2NPUKernel : public framework::OpKernel<T> {
...
@@ -39,19 +402,6 @@ class InterpolateV2NPUKernel : public framework::OpKernel<T> {
int
n
,
c
,
in_d
,
in_h
,
in_w
;
int
n
,
c
,
in_d
,
in_h
,
in_w
;
ExtractNCDWH
(
input_dims
,
data_layout
,
&
n
,
&
c
,
&
in_d
,
&
in_h
,
&
in_w
);
ExtractNCDWH
(
input_dims
,
data_layout
,
&
n
,
&
c
,
&
in_d
,
&
in_h
,
&
in_w
);
PADDLE_ENFORCE_EQ
(
input
->
layout
(),
data_layout
,
platform
::
errors
::
InvalidArgument
(
"Interpolate OP's input tensor layout should equal to attr "
"data_layout, but got tensor layout <%s>, attr layout <%s>"
,
framework
::
DataLayoutToString
(
input
->
layout
()),
data_layout_str
));
PADDLE_ENFORCE_EQ
(
output
->
layout
(),
data_layout
,
platform
::
errors
::
InvalidArgument
(
"Interpolate OP's output tensor layout should equal to attr "
"data_layout, but got tensor layout <%s>, attr layout <%s>"
,
framework
::
DataLayoutToString
(
output
->
layout
()),
data_layout_str
));
auto
interp_method
=
ctx
.
Attr
<
std
::
string
>
(
"interp_method"
);
auto
interp_method
=
ctx
.
Attr
<
std
::
string
>
(
"interp_method"
);
bool
align_corners
=
ctx
.
Attr
<
bool
>
(
"align_corners"
);
bool
align_corners
=
ctx
.
Attr
<
bool
>
(
"align_corners"
);
...
@@ -156,17 +506,22 @@ class InterpolateV2NPUKernel : public framework::OpKernel<T> {
...
@@ -156,17 +506,22 @@ class InterpolateV2NPUKernel : public framework::OpKernel<T> {
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
.
stream
();
NpuOpRunner
runner
;
// To-do(qili93): need to support bilineare, try ResizeD
// To-do(qili93): need to support bilineare, try ResizeD
// Add bilineare by zhulei
if
(
"nearest"
==
interp_method
)
{
if
(
"nearest"
==
interp_method
)
{
NpuOpRunner
runner
;
runner
.
SetType
(
"ResizeNearestNeighborV2"
)
runner
.
SetType
(
"ResizeNearestNeighborV2"
)
.
AddInput
(
*
input
)
.
AddInput
(
*
input
)
.
AddInput
(
std
::
vector
<
int32_t
>
{
out_h
,
out_w
})
.
AddInput
(
std
::
vector
<
int32_t
>
{
out_h
,
out_w
})
.
AddOutput
(
*
output
)
.
AddOutput
(
*
output
)
.
AddAttr
(
"align_corners"
,
align_corners
)
.
AddAttr
(
"align_corners"
,
align_corners
)
.
AddAttr
(
"half_pixel_centers"
,
false
);
.
AddAttr
(
"half_pixel_centers"
,
false
);
}
runner
.
Run
(
stream
);
runner
.
Run
(
stream
);
}
else
if
(
"bilinear"
==
interp_method
)
{
int
align_mode
=
ctx
.
Attr
<
int
>
(
"align_mode"
);
BilinearFwdNpu
<
T
>
(
ctx
,
input
,
output
,
scale_h
,
scale_w
,
align_corners
,
align_mode
,
data_layout
);
}
}
}
};
};
...
@@ -184,27 +539,6 @@ class InterpolateV2NPUGradKernel : public framework::OpKernel<T> {
...
@@ -184,27 +539,6 @@ class InterpolateV2NPUGradKernel : public framework::OpKernel<T> {
int
n
,
c
,
in_d
,
in_h
,
in_w
;
int
n
,
c
,
in_d
,
in_h
,
in_w
;
ExtractNCDWH
(
input
->
dims
(),
data_layout
,
&
n
,
&
c
,
&
in_d
,
&
in_h
,
&
in_w
);
ExtractNCDWH
(
input
->
dims
(),
data_layout
,
&
n
,
&
c
,
&
in_d
,
&
in_h
,
&
in_w
);
PADDLE_ENFORCE_EQ
(
input
->
layout
(),
data_layout
,
platform
::
errors
::
InvalidArgument
(
"Interpolate OP's input tensor layout should equal to attr "
"data_layout, but got tensor layout <%s>, attr layout <%s>"
,
framework
::
DataLayoutToString
(
input
->
layout
()),
data_layout_str
));
PADDLE_ENFORCE_EQ
(
output_grad
->
layout
(),
data_layout
,
platform
::
errors
::
InvalidArgument
(
"Interpolate OP's output_grad tensor layout should "
"equal to attr data_layout, but got tensor layout is "
"<%s>, and attr layout is <%s>"
,
framework
::
DataLayoutToString
(
output_grad
->
layout
()),
data_layout_str
));
PADDLE_ENFORCE_EQ
(
input_grad
->
layout
(),
data_layout
,
platform
::
errors
::
InvalidArgument
(
"Interpolate OP's input_grad tensor layout should "
"equal to attr data_layout, but got tensor layout is "
"<%s>, and attr layout is <%s>"
,
framework
::
DataLayoutToString
(
input_grad
->
layout
()),
data_layout_str
));
auto
interp_method
=
ctx
.
Attr
<
std
::
string
>
(
"interp_method"
);
auto
interp_method
=
ctx
.
Attr
<
std
::
string
>
(
"interp_method"
);
bool
align_corners
=
ctx
.
Attr
<
bool
>
(
"align_corners"
);
bool
align_corners
=
ctx
.
Attr
<
bool
>
(
"align_corners"
);
...
@@ -301,17 +635,21 @@ class InterpolateV2NPUGradKernel : public framework::OpKernel<T> {
...
@@ -301,17 +635,21 @@ class InterpolateV2NPUGradKernel : public framework::OpKernel<T> {
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
.
stream
();
NpuOpRunner
runner
;
// To-do(qili93): need to support bilineare, try ResizeGradD
// To-do(qili93): need to support bilineare, try ResizeGradD
if
(
"nearest"
==
interp_method
)
{
if
(
"nearest"
==
interp_method
)
{
NpuOpRunner
runner
;
runner
.
SetType
(
"ResizeNearestNeighborV2Grad"
)
runner
.
SetType
(
"ResizeNearestNeighborV2Grad"
)
.
AddInput
(
*
output_grad
)
.
AddInput
(
*
output_grad
)
.
AddInput
(
std
::
vector
<
int32_t
>
{
in_h
,
in_w
})
.
AddInput
(
std
::
vector
<
int32_t
>
{
in_h
,
in_w
})
.
AddOutput
(
*
input_grad
)
.
AddOutput
(
*
input_grad
)
.
AddAttr
(
"align_corners"
,
align_corners
)
.
AddAttr
(
"align_corners"
,
align_corners
)
.
AddAttr
(
"half_pixel_centers"
,
false
);
.
AddAttr
(
"half_pixel_centers"
,
false
);
}
runner
.
Run
(
stream
);
runner
.
Run
(
stream
);
}
else
if
(
"bilinear"
==
interp_method
)
{
int
align_mode
=
ctx
.
Attr
<
int
>
(
"align_mode"
);
BilinearBwdNpu
<
T
>
(
ctx
,
output_grad
,
input_grad
,
scale_h
,
scale_w
,
align_corners
,
align_mode
,
data_layout
);
}
}
}
};
};
...
@@ -330,3 +668,13 @@ REGISTER_OP_NPU_KERNEL(
...
@@ -330,3 +668,13 @@ REGISTER_OP_NPU_KERNEL(
nearest_interp_v2_grad
,
nearest_interp_v2_grad
,
ops
::
InterpolateV2NPUGradKernel
<
plat
::
NPUDeviceContext
,
float
>
,
ops
::
InterpolateV2NPUGradKernel
<
plat
::
NPUDeviceContext
,
float
>
,
ops
::
InterpolateV2NPUGradKernel
<
plat
::
NPUDeviceContext
,
plat
::
float16
>
);
ops
::
InterpolateV2NPUGradKernel
<
plat
::
NPUDeviceContext
,
plat
::
float16
>
);
REGISTER_OP_NPU_KERNEL
(
bilinear_interp_v2
,
ops
::
InterpolateV2NPUKernel
<
plat
::
NPUDeviceContext
,
float
>
,
ops
::
InterpolateV2NPUKernel
<
plat
::
NPUDeviceContext
,
plat
::
float16
>
);
REGISTER_OP_NPU_KERNEL
(
bilinear_interp_v2_grad
,
ops
::
InterpolateV2NPUGradKernel
<
plat
::
NPUDeviceContext
,
float
>
,
ops
::
InterpolateV2NPUGradKernel
<
plat
::
NPUDeviceContext
,
plat
::
float16
>
);
python/paddle/fluid/tests/unittests/npu/CMakeLists.txt
浏览文件 @
be2884eb
...
@@ -17,6 +17,7 @@ if (WITH_ASCEND_CL)
...
@@ -17,6 +17,7 @@ if (WITH_ASCEND_CL)
# Note: the following test cases has running time more than 120s
# Note: the following test cases has running time more than 120s
set_tests_properties
(
test_nearest_interp_op_npu PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_nearest_interp_op_npu PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_nearest_interp_v2_op_npu PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_nearest_interp_v2_op_npu PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_bilinear_interp_v2_op_npu PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_stack_op_npu PROPERTIES TIMEOUT 300
)
set_tests_properties
(
test_stack_op_npu PROPERTIES TIMEOUT 300
)
set_tests_properties
(
test_conv2d_transpose_op_npu PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_conv2d_transpose_op_npu PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_conv2d_op_npu PROPERTIES TIMEOUT 300
)
set_tests_properties
(
test_conv2d_op_npu PROPERTIES TIMEOUT 300
)
...
...
python/paddle/fluid/tests/unittests/npu/test_bilinear_interp_v2_op_npu.py
0 → 100644
浏览文件 @
be2884eb
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
sys
sys
.
path
.
append
(
".."
)
from
op_test
import
OpTest
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
from
paddle.nn.functional
import
interpolate
import
paddle
from
test_bilinear_interp_v2_op
import
bilinear_interp_np
paddle
.
enable_static
()
class
TestBilinearInterpOp
(
OpTest
):
def
set_npu
(
self
):
self
.
__class__
.
use_npu
=
True
self
.
place
=
paddle
.
NPUPlace
(
0
)
def
setUp
(
self
):
self
.
set_npu
()
self
.
out_size
=
None
self
.
actual_shape
=
None
self
.
data_layout
=
'NCHW'
self
.
init_test_case
()
self
.
op_type
=
"bilinear_interp_v2"
input_np
=
np
.
random
.
random
(
self
.
input_shape
).
astype
(
self
.
dtype
)
if
self
.
data_layout
==
"NCHW"
:
in_h
=
self
.
input_shape
[
2
]
in_w
=
self
.
input_shape
[
3
]
else
:
in_h
=
self
.
input_shape
[
1
]
in_w
=
self
.
input_shape
[
2
]
scale_h
=
0
scale_w
=
0
if
self
.
scale
:
if
isinstance
(
self
.
scale
,
float
)
or
isinstance
(
self
.
scale
,
int
):
if
self
.
scale
>
0.
:
scale_h
=
scale_w
=
float
(
self
.
scale
)
if
isinstance
(
self
.
scale
,
list
)
and
len
(
self
.
scale
)
==
1
:
scale_w
=
scale_h
=
self
.
scale
[
0
]
elif
isinstance
(
self
.
scale
,
list
)
and
len
(
self
.
scale
)
>
1
:
scale_w
=
self
.
scale
[
1
]
scale_h
=
self
.
scale
[
0
]
out_h
=
int
(
in_h
*
scale_h
)
out_w
=
int
(
in_w
*
scale_w
)
else
:
out_h
=
self
.
out_h
out_w
=
self
.
out_w
output_np
=
bilinear_interp_np
(
input_np
,
out_h
,
out_w
,
scale_w
,
scale_h
,
self
.
out_size
,
self
.
actual_shape
,
self
.
align_corners
,
self
.
align_mode
,
self
.
data_layout
)
self
.
inputs
=
{
'X'
:
input_np
}
if
self
.
out_size
is
not
None
:
self
.
inputs
[
'OutSize'
]
=
self
.
out_size
if
self
.
actual_shape
is
not
None
:
self
.
inputs
[
'OutSize'
]
=
self
.
actual_shape
self
.
attrs
=
{
'out_h'
:
self
.
out_h
,
'out_w'
:
self
.
out_w
,
'interp_method'
:
self
.
interp_method
,
'align_corners'
:
self
.
align_corners
,
'align_mode'
:
self
.
align_mode
,
'data_layout'
:
self
.
data_layout
}
if
self
.
scale
:
if
isinstance
(
self
.
scale
,
float
)
or
isinstance
(
self
.
scale
,
int
):
if
self
.
scale
>
0.
:
self
.
scale
=
[
self
.
scale
]
if
isinstance
(
self
.
scale
,
list
)
and
len
(
self
.
scale
)
==
1
:
self
.
scale
=
[
self
.
scale
[
0
],
self
.
scale
[
0
]]
self
.
attrs
[
'scale'
]
=
self
.
scale
self
.
outputs
=
{
'Out'
:
output_np
}
def
test_check_output
(
self
):
self
.
check_output_with_place
(
self
.
place
,
atol
=
self
.
atol
)
def
test_check_grad
(
self
):
self
.
__class__
.
exist_check_grad
=
True
if
self
.
dtype
==
'float16'
:
return
self
.
max_relative_error
=
0.005
inputs_to_check
=
[
'X'
]
output_names
=
[
'Out'
]
no_grad_set
=
set
()
cpu_place
=
fluid
.
CPUPlace
()
cpu_grads
=
self
.
_get_gradient
(
inputs_to_check
,
cpu_place
,
output_names
,
no_grad_set
)
npu_grads
=
self
.
_get_gradient
(
inputs_to_check
,
self
.
place
,
output_names
,
no_grad_set
)
self
.
_assert_is_close
(
cpu_grads
,
npu_grads
,
inputs_to_check
,
self
.
max_relative_error
,
"Gradient Check between places"
)
def
init_test_case
(
self
):
self
.
interp_method
=
'bilinear'
self
.
input_shape
=
[
2
,
3
,
5
,
7
]
self
.
out_h
=
60
self
.
out_w
=
25
self
.
scale
=
1.5
self
.
align_corners
=
False
self
.
align_mode
=
1
self
.
dtype
=
'float32'
self
.
atol
=
1e-5
class
TestBilinearInterpCaseFP16
(
TestBilinearInterpOp
):
def
init_test_case
(
self
):
super
(
TestBilinearInterpCaseFP16
,
self
).
init_test_case
()
self
.
dtype
=
'float16'
self
.
atol
=
1e-2
class
TestBilinearInterpCase1
(
TestBilinearInterpOp
):
def
init_test_case
(
self
):
super
(
TestBilinearInterpCase1
,
self
).
init_test_case
()
self
.
input_shape
=
[
4
,
1
,
7
,
8
]
self
.
out_h
=
1
self
.
out_w
=
1
self
.
scale
=
0.
class
TestBilinearInterpCase2
(
TestBilinearInterpOp
):
def
init_test_case
(
self
):
super
(
TestBilinearInterpCase2
,
self
).
init_test_case
()
self
.
input_shape
=
[
3
,
3
,
9
,
6
]
self
.
out_h
=
12
self
.
out_w
=
12
self
.
scale
=
0.
class
TestBilinearInterpCase3
(
TestBilinearInterpOp
):
def
init_test_case
(
self
):
super
(
TestBilinearInterpCase3
,
self
).
init_test_case
()
self
.
input_shape
=
[
1
,
1
,
32
,
64
]
self
.
out_h
=
64
self
.
out_w
=
32
self
.
scale
=
0.
class
TestBilinearInterpCase4
(
TestBilinearInterpOp
):
def
init_test_case
(
self
):
super
(
TestBilinearInterpCase4
,
self
).
init_test_case
()
self
.
input_shape
=
[
4
,
1
,
7
,
8
]
self
.
out_h
=
1
self
.
out_w
=
1
self
.
scale
=
0.
self
.
out_size
=
np
.
array
([
2
,
2
]).
astype
(
"int32"
)
class
TestBilinearInterpCase5
(
TestBilinearInterpOp
):
def
init_test_case
(
self
):
super
(
TestBilinearInterpCase5
,
self
).
init_test_case
()
self
.
input_shape
=
[
3
,
3
,
9
,
6
]
self
.
out_h
=
12
self
.
out_w
=
12
self
.
scale
=
0.
self
.
out_size
=
np
.
array
([
11
,
11
]).
astype
(
"int32"
)
class
TestBilinearInterpCase6
(
TestBilinearInterpOp
):
def
init_test_case
(
self
):
super
(
TestBilinearInterpCase6
,
self
).
init_test_case
()
self
.
input_shape
=
[
1
,
1
,
32
,
64
]
self
.
out_h
=
64
self
.
out_w
=
32
self
.
scale
=
0.
self
.
out_size
=
np
.
array
([
65
,
33
]).
astype
(
"int32"
)
class
TestBilinearInterpCase7
(
TestBilinearInterpOp
):
def
init_test_case
(
self
):
super
(
TestBilinearInterpCase7
,
self
).
init_test_case
()
self
.
input_shape
=
[
1
,
1
,
32
,
64
]
self
.
out_h
=
64
self
.
out_w
=
32
self
.
scale
=
[
2.0
,
0.5
]
class
TestBilinearInterpSame
(
TestBilinearInterpOp
):
def
init_test_case
(
self
):
super
(
TestBilinearInterpSame
,
self
).
init_test_case
()
self
.
input_shape
=
[
2
,
3
,
32
,
64
]
self
.
out_h
=
32
self
.
out_w
=
64
self
.
scale
=
0.
class
TestBilinearInterpActualShape
(
TestBilinearInterpOp
):
def
init_test_case
(
self
):
super
(
TestBilinearInterpActualShape
,
self
).
init_test_case
()
self
.
input_shape
=
[
3
,
2
,
32
,
16
]
self
.
out_h
=
64
self
.
out_w
=
32
self
.
scale
=
0.
self
.
out_size
=
np
.
array
([
66
,
40
]).
astype
(
"int32"
)
class
TestBilinearInterpDataLayout
(
TestBilinearInterpOp
):
def
init_test_case
(
self
):
super
(
TestBilinearInterpDataLayout
,
self
).
init_test_case
()
self
.
input_shape
=
[
2
,
5
,
5
,
3
]
self
.
out_h
=
2
self
.
out_w
=
2
self
.
scale
=
0.
self
.
out_size
=
np
.
array
([
3
,
3
]).
astype
(
"int32"
)
self
.
data_layout
=
"NHWC"
class
TestBilinearInterpOtherMethod1
(
TestBilinearInterpOp
):
def
set_align_mode
(
self
):
self
.
align_corners
=
False
self
.
align_mode
=
1
class
TestBilinearInterpWithMethod2
(
TestBilinearInterpOp
):
def
set_align_mode
(
self
):
self
.
align_corners
=
False
self
.
align_mode
=
0
class
TestBilinearInterpWithMethod3
(
TestBilinearInterpOp
):
def
set_align_mode
(
self
):
self
.
align_corners
=
True
self
.
align_mode
=
0
class
TestBilinearInterpScale1
(
TestBilinearInterpOp
):
def
init_test_case
(
self
):
super
(
TestBilinearInterpScale1
,
self
).
init_test_case
()
self
.
input_shape
=
[
2
,
3
,
5
,
7
]
self
.
out_h
=
60
self
.
out_w
=
25
self
.
scale
=
2.
class
TestBilinearInterpScale2
(
TestBilinearInterpOp
):
def
init_test_case
(
self
):
super
(
TestBilinearInterpScale2
,
self
).
init_test_case
()
self
.
input_shape
=
[
2
,
3
,
5
,
7
]
self
.
out_h
=
60
self
.
out_w
=
25
self
.
scale
=
1.
class
TestBilinearInterpZero
(
TestBilinearInterpOp
):
def
init_test_case
(
self
):
super
(
TestBilinearInterpZero
,
self
).
init_test_case
()
self
.
input_shape
=
[
2
,
3
,
5
,
7
]
self
.
out_h
=
60
self
.
out_w
=
25
self
.
scale
=
0.2
self
.
align_mode
=
0
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录