未验证 提交 1bc47c84 编写于 作者: Y Yao Zihang 提交者: GitHub

Optimize batchnorm1d using 2D kernel (#43530)

上级 a2c4c86b
......@@ -591,10 +591,12 @@ void BatchNormGradRawKernel(const Context &ctx,
// ctx.GetPlace()),
// epsilon, saved_mean_data, saved_var_data));
#else
// CUDNN PER_ACTIVATION mode only support small batch size
// CUDNN only support small batch size
const size_t CUDNN_PER_ACTIVATION_THRESHOLD = 131070;
const size_t CUDNN_SPATIAL_THRESHOLD = 880801;
const bool use_native_kernel =
(x_dims.size() == 2 && N >= CUDNN_PER_ACTIVATION_THRESHOLD);
((x_dims.size() == 2 && N >= CUDNN_PER_ACTIVATION_THRESHOLD) ||
(x_dims.size() == 3 && N >= CUDNN_SPATIAL_THRESHOLD));
if (use_native_kernel) {
if (compute_format == DataLayout::kNCHW) {
BNBackward<T, block, DataLayout::kNCHW>
......
......@@ -82,50 +82,58 @@ class TestBatchNorm(unittest.TestCase):
self.assertRaises(ValueError, error2d_dataformat)
self.assertRaises(ValueError, error3d_dataformat)
def test_eager_api(self):
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
shape = [4, 10, 4, 4]
def test_large_batch(self):
def compute_v1(x):
def compute_baseline(x):
with fluid.dygraph.guard(p):
bn = fluid.dygraph.BatchNorm(shape[1])
#bn = paddle.nn.BatchNorm2D(shape[1])
x1 = paddle.to_tensor(x)
x1.stop_gradient = False
y = bn(x1)
y.backward()
return y.numpy(), x1.gradient()
def compute_v2(x):
def compute_1d(x):
with fluid.dygraph.guard(p):
with _test_eager_guard():
print("v2")
bn = paddle.nn.BatchNorm2D(shape[1])
bn = paddle.nn.BatchNorm1D(shape[1])
x1 = paddle.to_tensor(x)
x1.stop_gradient = False
y = bn(x1)
y.backward()
return y.numpy(), x1.gradient()
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
# [N, C]
shape = [200000, 4]
x = np.random.randn(*shape).astype("float32")
y1, g1 = compute_baseline(x)
y2, g2 = compute_1d(x)
self.assertTrue(np.allclose(g1, g2))
self.assertTrue(np.allclose(y1, y2))
# [N, C, L]
shape = [1000000, 4, 4]
x = np.random.randn(*shape).astype("float32")
y1, g1 = compute_v1(x)
y2, g2 = compute_v2(x)
y1, g1 = compute_baseline(x)
y2, g2 = compute_1d(x)
self.assertTrue(np.allclose(g1, g2))
self.assertTrue(np.allclose(y1, y2))
def test_eager_api_1d(self):
def test_eager_api(self):
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
shape = [200000, 4]
shape = [4, 10, 4, 4]
def compute_v1(x):
with fluid.dygraph.guard(p):
bn = fluid.dygraph.BatchNorm(shape[1])
#bn = paddle.nn.BatchNorm2D(shape[1])
x1 = paddle.to_tensor(x)
x1.stop_gradient = False
y = bn(x1)
......@@ -135,7 +143,8 @@ class TestBatchNorm(unittest.TestCase):
def compute_v2(x):
with fluid.dygraph.guard(p):
with _test_eager_guard():
bn = paddle.nn.BatchNorm1D(shape[1])
print("v2")
bn = paddle.nn.BatchNorm2D(shape[1])
x1 = paddle.to_tensor(x)
x1.stop_gradient = False
y = bn(x1)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册