Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
1bc47c84
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1bc47c84
编写于
7月 14, 2022
作者:
Y
Yao Zihang
提交者:
GitHub
7月 14, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Optimize batchnorm1d using 2D kernel (#43530)
上级
a2c4c86b
变更
3
展开全部
隐藏空白更改
内联
并排
Showing
3 changed file
with
549 addition
and
38 deletion
+549
-38
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
+4
-2
paddle/phi/kernels/gpu/batch_norm_kernel.cu
paddle/phi/kernels/gpu/batch_norm_kernel.cu
+511
-11
python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py
python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py
+34
-25
未找到文件。
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
浏览文件 @
1bc47c84
...
...
@@ -591,10 +591,12 @@ void BatchNormGradRawKernel(const Context &ctx,
// ctx.GetPlace()),
// epsilon, saved_mean_data, saved_var_data));
#else
// CUDNN
PER_ACTIVATION mode
only support small batch size
// CUDNN only support small batch size
const
size_t
CUDNN_PER_ACTIVATION_THRESHOLD
=
131070
;
const
size_t
CUDNN_SPATIAL_THRESHOLD
=
880801
;
const
bool
use_native_kernel
=
(
x_dims
.
size
()
==
2
&&
N
>=
CUDNN_PER_ACTIVATION_THRESHOLD
);
((
x_dims
.
size
()
==
2
&&
N
>=
CUDNN_PER_ACTIVATION_THRESHOLD
)
||
(
x_dims
.
size
()
==
3
&&
N
>=
CUDNN_SPATIAL_THRESHOLD
));
if
(
use_native_kernel
)
{
if
(
compute_format
==
DataLayout
::
kNCHW
)
{
BNBackward
<
T
,
block
,
DataLayout
::
kNCHW
>
...
...
paddle/phi/kernels/gpu/batch_norm_kernel.cu
浏览文件 @
1bc47c84
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py
浏览文件 @
1bc47c84
...
...
@@ -82,50 +82,58 @@ class TestBatchNorm(unittest.TestCase):
self
.
assertRaises
(
ValueError
,
error2d_dataformat
)
self
.
assertRaises
(
ValueError
,
error3d_dataformat
)
def
test_eager_api
(
self
):
places
=
[
fluid
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
fluid
.
CUDAPlace
(
0
))
for
p
in
places
:
shape
=
[
4
,
10
,
4
,
4
]
def
test_large_batch
(
self
):
def
compute_v1
(
x
):
with
fluid
.
dygraph
.
guard
(
p
):
bn
=
fluid
.
dygraph
.
BatchNorm
(
shape
[
1
])
#bn = paddle.nn.BatchNorm2D(shape[1])
def
compute_baseline
(
x
):
with
fluid
.
dygraph
.
guard
(
p
):
bn
=
fluid
.
dygraph
.
BatchNorm
(
shape
[
1
])
x1
=
paddle
.
to_tensor
(
x
)
x1
.
stop_gradient
=
False
y
=
bn
(
x1
)
y
.
backward
()
return
y
.
numpy
(),
x1
.
gradient
()
def
compute_1d
(
x
):
with
fluid
.
dygraph
.
guard
(
p
):
with
_test_eager_guard
():
bn
=
paddle
.
nn
.
BatchNorm1D
(
shape
[
1
])
x1
=
paddle
.
to_tensor
(
x
)
x1
.
stop_gradient
=
False
y
=
bn
(
x1
)
y
.
backward
()
return
y
.
numpy
(),
x1
.
gradient
()
def
compute_v2
(
x
):
with
fluid
.
dygraph
.
guard
(
p
):
with
_test_eager_guard
():
print
(
"v2"
)
bn
=
paddle
.
nn
.
BatchNorm2D
(
shape
[
1
])
x1
=
paddle
.
to_tensor
(
x
)
x1
.
stop_gradient
=
False
y
=
bn
(
x1
)
y
.
backward
()
return
y
.
numpy
(),
x1
.
gradient
()
places
=
[
fluid
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
fluid
.
CUDAPlace
(
0
))
for
p
in
places
:
# [N, C]
shape
=
[
200000
,
4
]
x
=
np
.
random
.
randn
(
*
shape
).
astype
(
"float32"
)
y1
,
g1
=
compute_baseline
(
x
)
y2
,
g2
=
compute_1d
(
x
)
self
.
assertTrue
(
np
.
allclose
(
g1
,
g2
))
self
.
assertTrue
(
np
.
allclose
(
y1
,
y2
))
# [N, C, L]
shape
=
[
1000000
,
4
,
4
]
x
=
np
.
random
.
randn
(
*
shape
).
astype
(
"float32"
)
y1
,
g1
=
compute_
v1
(
x
)
y2
,
g2
=
compute_
v2
(
x
)
y1
,
g1
=
compute_
baseline
(
x
)
y2
,
g2
=
compute_
1d
(
x
)
self
.
assertTrue
(
np
.
allclose
(
g1
,
g2
))
self
.
assertTrue
(
np
.
allclose
(
y1
,
y2
))
def
test_eager_api
_1d
(
self
):
def
test_eager_api
(
self
):
places
=
[
fluid
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
fluid
.
CUDAPlace
(
0
))
for
p
in
places
:
shape
=
[
200000
,
4
]
shape
=
[
4
,
10
,
4
,
4
]
def
compute_v1
(
x
):
with
fluid
.
dygraph
.
guard
(
p
):
bn
=
fluid
.
dygraph
.
BatchNorm
(
shape
[
1
])
#bn = paddle.nn.BatchNorm2D(shape[1])
x1
=
paddle
.
to_tensor
(
x
)
x1
.
stop_gradient
=
False
y
=
bn
(
x1
)
...
...
@@ -135,7 +143,8 @@ class TestBatchNorm(unittest.TestCase):
def
compute_v2
(
x
):
with
fluid
.
dygraph
.
guard
(
p
):
with
_test_eager_guard
():
bn
=
paddle
.
nn
.
BatchNorm1D
(
shape
[
1
])
print
(
"v2"
)
bn
=
paddle
.
nn
.
BatchNorm2D
(
shape
[
1
])
x1
=
paddle
.
to_tensor
(
x
)
x1
.
stop_gradient
=
False
y
=
bn
(
x1
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录