Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
8489d4f7
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8489d4f7
编写于
1月 18, 2021
作者:
Q
QingshuChen
提交者:
GitHub
1月 18, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
optimize batch_norm & pool op for kunlun (#30490)
上级
bd971922
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
197 addition
and
91 deletion
+197
-91
paddle/fluid/operators/batch_norm_op_xpu.cc
paddle/fluid/operators/batch_norm_op_xpu.cc
+8
-10
paddle/fluid/operators/pool_op_xpu.cc
paddle/fluid/operators/pool_op_xpu.cc
+38
-56
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+1
-10
python/paddle/fluid/tests/unittests/xpu/test_pool2d_op_xpu.py
...on/paddle/fluid/tests/unittests/xpu/test_pool2d_op_xpu.py
+150
-15
未找到文件。
paddle/fluid/operators/batch_norm_op_xpu.cc
浏览文件 @
8489d4f7
...
...
@@ -139,16 +139,14 @@ class BatchNormGradXPUKernel : public framework::OpKernel<T> {
auto
*
dscale_data
=
dscale
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
dbias_data
=
dbias
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
int
r
=
xpu
::
batch_norm_backward
(
dev_ctx
.
x_context
(),
N
,
C
,
H
,
W
,
x_data
,
dy_data
,
scale_data
,
saved_mean_data
,
saved_inv_variance_data
,
dx_data
,
dscale_data
,
dbias_data
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API(batch_norm_infer_forward) return "
"wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed."
,
r
));
int
r
=
xpu
::
batch_norm_grad
<
T
>
(
dev_ctx
.
x_context
(),
x_data
,
dy_data
,
dx_data
,
N
,
C
,
H
,
W
,
scale_data
,
saved_mean_data
,
saved_inv_variance_data
,
dscale_data
,
dbias_data
,
true
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API(batch_norm_grad) return "
"wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
};
...
...
paddle/fluid/operators/pool_op_xpu.cc
浏览文件 @
8489d4f7
...
...
@@ -30,6 +30,7 @@ xpu::Pooling_t XPUPoolingType(const std::string& pooltype, bool exclusive,
"Pool op only supports 2D and 3D input."
));
}
}
template
<
typename
DeviceContext
,
typename
T
>
class
PoolXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -41,7 +42,6 @@ class PoolXPUKernel : public framework::OpKernel<T> {
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
bool
exclusive
=
context
.
Attr
<
bool
>
(
"exclusive"
);
bool
is_test
=
context
.
Attr
<
bool
>
(
"is_test"
);
bool
adaptive
=
context
.
Attr
<
bool
>
(
"adaptive"
);
PADDLE_ENFORCE_EQ
(
ksize
.
size
(),
2
,
...
...
@@ -60,36 +60,32 @@ class PoolXPUKernel : public framework::OpKernel<T> {
ksize
[
i
]
=
static_cast
<
int
>
(
in_x
->
dims
()[
i
+
2
]);
}
}
const
int
c
=
in_x
->
dims
()[
0
]
*
in_x
->
dims
()[
1
];
const
int
n
=
in_x
->
dims
()[
0
];
const
int
c
=
in_x
->
dims
()[
1
];
const
int
in_h
=
in_x
->
dims
()[
2
];
const
int
in_w
=
in_x
->
dims
()[
3
];
const
int
out_h
=
out
->
dims
()[
2
];
const
int
out_w
=
out
->
dims
()[
3
];
const
int
win_h
=
ksize
[
0
];
const
int
win_w
=
ksize
[
1
];
const
int
stride_h
=
strides
[
0
];
const
int
stride_w
=
strides
[
1
];
const
int
pad_up
=
paddings
[
0
];
const
int
pad_down
=
paddings
[
0
];
const
int
pad_left
=
paddings
[
1
];
const
int
pad_right
=
paddings
[
1
];
const
float
*
input
=
in_x
->
data
<
float
>
();
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
float
*
output
=
out
->
data
<
float
>
();
xpu
::
Pooling_t
pool_type
=
XPUPoolingType
(
pooling_type
,
exclusive
,
is_test
);
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
int
r
=
xpu
::
pooling_forward
<
float
,
float
>
(
dev_ctx
.
x_context
(),
input
,
output
,
index_data
,
pool_type
,
c
,
in_h
,
in_w
,
pad_left
,
pad_right
,
pad_up
,
pad_down
,
win_h
,
win_w
,
stride_h
,
stride_w
,
out_h
,
out_w
);
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
External
(
"The pool2d XPU API return wrong value[%d], please check "
"where Baidu Kunlun Card is properly installed."
,
r
));
int
r
=
xpu
::
Error_t
::
SUCCESS
;
if
(
pooling_type
==
"max"
)
{
r
=
xpu
::
max_pool2d
(
dev_ctx
.
x_context
(),
input
,
output
,
index_data
,
n
,
c
,
in_h
,
in_w
,
ksize
,
strides
,
paddings
,
true
);
}
else
if
(
pooling_type
==
"avg"
)
{
r
=
xpu
::
avg_pool2d
(
dev_ctx
.
x_context
(),
input
,
output
,
n
,
c
,
in_h
,
in_w
,
ksize
,
strides
,
paddings
,
!
exclusive
,
true
);
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Unsupported pooling type for kunlun "
,
pooling_type
));
}
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
External
(
"The pool2d XPU API return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
PoolGradXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -126,47 +122,33 @@ class PoolGradXPUKernel : public framework::OpKernel<T> {
if
(
!
in_x_grad
)
{
return
;
}
const
int
c
=
in_x
->
dims
()[
0
]
*
in_x
->
dims
()[
1
];
const
int
n
=
in_x
->
dims
()[
0
];
const
int
c
=
in_x
->
dims
()[
1
];
const
int
in_h
=
in_x
->
dims
()[
2
];
const
int
in_w
=
in_x
->
dims
()[
3
];
const
int
out_h
=
out
->
dims
()[
2
];
const
int
out_w
=
out
->
dims
()[
3
];
const
int
win_h
=
ksize
[
0
];
const
int
win_w
=
ksize
[
1
];
const
int
stride_h
=
strides
[
0
];
const
int
stride_w
=
strides
[
1
];
const
int
pad_up
=
paddings
[
0
];
const
int
pad_down
=
paddings
[
0
];
const
int
pad_left
=
paddings
[
1
];
const
int
pad_right
=
paddings
[
1
];
const
float
*
input
=
in_x
->
data
<
float
>
();
const
float
*
output
=
out
->
data
<
float
>
();
const
float
*
output_grad
=
out_grad
->
data
<
float
>
();
in_x_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
float
*
input_grad
=
in_x_grad
->
data
<
float
>
();
xpu
::
Pooling_t
pool_type
=
XPUPoolingType
(
pooling_type
,
exclusive
,
false
);
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
// Need to init memory in the first place
const
int
zero
=
0
;
int
r
=
xpu
::
memset
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
void
**>
(
input_grad
),
zero
,
in_x_grad
->
numel
()
*
sizeof
(
float
));
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
External
(
"The Pool2d XPU OP return wrong value[%d], please check "
"where Baidu Kunlun Card is properly installed."
,
r
));
r
=
xpu
::
pooling_backward
(
dev_ctx
.
x_context
(),
input
,
output
,
index_data
,
output_grad
,
input_grad
,
pool_type
,
c
,
in_h
,
in_w
,
pad_left
,
pad_right
,
pad_up
,
pad_down
,
win_h
,
win_w
,
stride_h
,
stride_w
,
out_h
,
out_w
);
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
External
(
"The Pool2d XPU OP return wrong value[%d], please check "
"where Baidu Kunlun Card is properly installed."
,
r
));
int
r
=
xpu
::
Error_t
::
SUCCESS
;
if
(
pooling_type
==
"max"
)
{
r
=
xpu
::
max_pool2d_grad
(
dev_ctx
.
x_context
(),
input
,
output
,
index_data
,
output_grad
,
input_grad
,
n
,
c
,
in_h
,
in_w
,
ksize
,
strides
,
paddings
,
true
);
}
else
if
(
pooling_type
==
"avg"
)
{
r
=
xpu
::
avg_pool2d_grad
(
dev_ctx
.
x_context
(),
input
,
output
,
output_grad
,
input_grad
,
n
,
c
,
in_h
,
in_w
,
ksize
,
strides
,
paddings
,
!
exclusive
,
true
);
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Unsupported pooling type for kunlun "
,
pooling_type
));
}
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
External
(
"The Pool2dGrad XPU OP return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
};
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
8489d4f7
...
...
@@ -172,16 +172,7 @@ Place CPUDeviceContext::GetPlace() const { return place_; }
#ifdef PADDLE_WITH_XPU
XPUDeviceContext
::
XPUDeviceContext
()
{
context_
=
xpu
::
create_context
();
}
XPUDeviceContext
::~
XPUDeviceContext
()
{
xpu
::
destroy_context
(
context_
);
void
*
l3ptr
=
nullptr
;
int
l3_size
=
13.5
*
1024
*
1024
;
xpu_malloc
(
static_cast
<
void
**>
(
&
l3ptr
),
l3_size
,
XPU_MEM_L3
);
if
(
l3ptr
!=
nullptr
)
{
context_
->
_l3_mgr
.
set
(
l3ptr
,
l3_size
);
std
::
cout
<<
"set l3 size "
<<
l3_size
<<
std
::
endl
;
}
}
XPUDeviceContext
::~
XPUDeviceContext
()
{}
XPUDeviceContext
::
XPUDeviceContext
(
XPUPlace
place
)
:
place_
(
place
)
{
int
dev_id
=
-
1
;
...
...
python/paddle/fluid/tests/unittests/xpu/test_pool2d_op_xpu.py
浏览文件 @
8489d4f7
# Copyright (c) 20
20
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 20
18
PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -13,16 +13,20 @@
# limitations under the License.
from
__future__
import
print_function
from
__future__
import
division
import
sys
sys
.
path
.
append
(
".."
)
import
paddle.fluid.core
as
core
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
import
paddle
import
paddle.fluid.core
as
core
from
op_test_xpu
import
XPUOpTest
import
paddle.fluid
as
fluid
from
paddle.fluid
import
Program
,
program_guard
import
paddle
paddle
.
enable_static
()
def
max_pool2D_forward_naive
(
x
,
...
...
@@ -241,7 +245,7 @@ def pool2D_forward_naive(x,
return
out
class
TestPool2D_Op
(
OpTest
):
class
TestPool2D_Op
(
XPU
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"pool2d"
self
.
use_cudnn
=
False
...
...
@@ -265,7 +269,7 @@ class TestPool2D_Op(OpTest):
input
,
self
.
ksize
,
self
.
strides
,
self
.
paddings
,
self
.
global_pool
,
self
.
ceil_mode
,
self
.
exclusive
,
self
.
adaptive
,
self
.
data_format
,
self
.
pool_type
,
self
.
padding_algorithm
).
astype
(
self
.
dtype
)
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
input
)}
self
.
inputs
=
{
'X'
:
XPU
OpTest
.
np_dtype_to_fluid_dtype
(
input
)}
self
.
attrs
=
{
'strides'
:
self
.
strides
,
...
...
@@ -284,18 +288,20 @@ class TestPool2D_Op(OpTest):
self
.
outputs
=
{
'Out'
:
output
}
def
has_xpu
(
self
):
return
core
.
is_compiled_with_xpu
()
def
test_check_output
(
self
):
if
paddle
.
is_compiled_with_xpu
():
paddle
.
enable_static
()
place
=
paddle
.
XPUPlace
(
0
)
if
self
.
has_xpu
():
place
=
core
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
)
return
def
test_check_grad
(
self
):
if
paddle
.
is_compiled_with_xpu
():
paddle
.
enable_static
()
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
set
([
'X'
]),
'Out'
,
max_relative_error
=
0.07
)
if
self
.
has_xpu
():
place
=
core
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
set
([
'X'
]),
'Out'
)
return
def
init_data_format
(
self
):
self
.
data_format
=
"NCHW"
...
...
@@ -315,7 +321,7 @@ class TestPool2D_Op(OpTest):
self
.
use_cudnn
=
False
def
init_data_type
(
self
):
self
.
dtype
=
np
.
float
64
self
.
dtype
=
np
.
float
32
def
init_pool_type
(
self
):
self
.
pool_type
=
"avg"
...
...
@@ -334,5 +340,134 @@ class TestPool2D_Op(OpTest):
self
.
adaptive
=
False
class
TestCase1
(
TestPool2D_Op
):
def
init_test_case
(
self
):
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
def
init_paddings
(
self
):
self
.
paddings
=
[
0
,
0
]
def
init_pool_type
(
self
):
self
.
pool_type
=
"avg"
self
.
pool2D_forward_naive
=
avg_pool2D_forward_naive
def
init_global_pool
(
self
):
self
.
global_pool
=
False
def
init_shape
(
self
):
self
.
shape
=
[
2
,
3
,
7
,
7
]
class
TestCase2
(
TestPool2D_Op
):
def
init_test_case
(
self
):
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
def
init_paddings
(
self
):
self
.
paddings
=
[
1
,
1
]
def
init_pool_type
(
self
):
self
.
pool_type
=
"avg"
self
.
pool2D_forward_naive
=
avg_pool2D_forward_naive
def
init_global_pool
(
self
):
self
.
global_pool
=
False
def
init_shape
(
self
):
self
.
shape
=
[
2
,
3
,
7
,
7
]
class
TestCase3
(
TestPool2D_Op
):
def
init_pool_type
(
self
):
self
.
pool_type
=
"max"
self
.
pool2D_forward_naive
=
max_pool2D_forward_naive
class
TestCase4
(
TestCase1
):
def
init_pool_type
(
self
):
self
.
pool_type
=
"max"
self
.
pool2D_forward_naive
=
max_pool2D_forward_naive
class
TestCase5
(
TestCase2
):
def
init_pool_type
(
self
):
self
.
pool_type
=
"max"
self
.
pool2D_forward_naive
=
max_pool2D_forward_naive
class
TestPool2D_AsyPadding
(
TestPool2D_Op
):
def
init_test_case
(
self
):
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
1
,
0
,
1
,
2
]
def
init_shape
(
self
):
self
.
shape
=
[
2
,
3
,
5
,
5
]
class
TestCase1_AsyPadding
(
TestCase1
):
def
init_test_case
(
self
):
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
1
,
0
,
1
,
0
]
def
init_shape
(
self
):
self
.
shape
=
[
2
,
3
,
7
,
7
]
class
TestCase2_AsyPadding
(
TestCase2
):
def
init_test_case
(
self
):
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
1
,
2
,
1
,
2
]
def
init_shape
(
self
):
self
.
shape
=
[
2
,
3
,
7
,
7
]
class
TestCase3_AsyPadding
(
TestCase3
):
def
init_test_case
(
self
):
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
1
,
0
,
1
,
2
]
def
init_shape
(
self
):
self
.
shape
=
[
2
,
3
,
5
,
5
]
class
TestCase4_AsyPadding
(
TestCase4
):
def
init_test_case
(
self
):
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
1
,
0
,
1
,
0
]
def
init_shape
(
self
):
self
.
shape
=
[
2
,
3
,
7
,
7
]
class
TestCase5_AsyPadding
((
TestCase5
)):
def
init_test_case
(
self
):
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
2
,
2
,
1
,
2
]
def
init_shape
(
self
):
self
.
shape
=
[
2
,
3
,
7
,
7
]
class
TestAvgInclude_AsyPadding
(
TestCase2
):
def
init_exclusive
(
self
):
self
.
exclusive
=
False
def
init_test_case
(
self
):
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
self
.
paddings
=
[
1
,
2
,
1
,
2
]
def
init_shape
(
self
):
self
.
shape
=
[
2
,
3
,
7
,
7
]
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录