未验证 提交 1bc47c84 编写于 作者: Y Yao Zihang 提交者: GitHub

Optimize batchnorm1d using 2D kernel (#43530)

上级 a2c4c86b
...@@ -591,10 +591,12 @@ void BatchNormGradRawKernel(const Context &ctx, ...@@ -591,10 +591,12 @@ void BatchNormGradRawKernel(const Context &ctx,
// ctx.GetPlace()), // ctx.GetPlace()),
// epsilon, saved_mean_data, saved_var_data)); // epsilon, saved_mean_data, saved_var_data));
#else #else
// CUDNN PER_ACTIVATION mode only support small batch size // CUDNN only support small batch size
const size_t CUDNN_PER_ACTIVATION_THRESHOLD = 131070; const size_t CUDNN_PER_ACTIVATION_THRESHOLD = 131070;
const size_t CUDNN_SPATIAL_THRESHOLD = 880801;
const bool use_native_kernel = const bool use_native_kernel =
(x_dims.size() == 2 && N >= CUDNN_PER_ACTIVATION_THRESHOLD); ((x_dims.size() == 2 && N >= CUDNN_PER_ACTIVATION_THRESHOLD) ||
(x_dims.size() == 3 && N >= CUDNN_SPATIAL_THRESHOLD));
if (use_native_kernel) { if (use_native_kernel) {
if (compute_format == DataLayout::kNCHW) { if (compute_format == DataLayout::kNCHW) {
BNBackward<T, block, DataLayout::kNCHW> BNBackward<T, block, DataLayout::kNCHW>
......
...@@ -82,50 +82,58 @@ class TestBatchNorm(unittest.TestCase): ...@@ -82,50 +82,58 @@ class TestBatchNorm(unittest.TestCase):
self.assertRaises(ValueError, error2d_dataformat) self.assertRaises(ValueError, error2d_dataformat)
self.assertRaises(ValueError, error3d_dataformat) self.assertRaises(ValueError, error3d_dataformat)
def test_eager_api(self): def test_large_batch(self):
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
shape = [4, 10, 4, 4]
def compute_v1(x): def compute_baseline(x):
with fluid.dygraph.guard(p): with fluid.dygraph.guard(p):
bn = fluid.dygraph.BatchNorm(shape[1]) bn = fluid.dygraph.BatchNorm(shape[1])
#bn = paddle.nn.BatchNorm2D(shape[1])
x1 = paddle.to_tensor(x) x1 = paddle.to_tensor(x)
x1.stop_gradient = False x1.stop_gradient = False
y = bn(x1) y = bn(x1)
y.backward() y.backward()
return y.numpy(), x1.gradient() return y.numpy(), x1.gradient()
def compute_v2(x): def compute_1d(x):
with fluid.dygraph.guard(p): with fluid.dygraph.guard(p):
with _test_eager_guard(): with _test_eager_guard():
print("v2") bn = paddle.nn.BatchNorm1D(shape[1])
bn = paddle.nn.BatchNorm2D(shape[1])
x1 = paddle.to_tensor(x) x1 = paddle.to_tensor(x)
x1.stop_gradient = False x1.stop_gradient = False
y = bn(x1) y = bn(x1)
y.backward() y.backward()
return y.numpy(), x1.gradient() return y.numpy(), x1.gradient()
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
# [N, C]
shape = [200000, 4]
x = np.random.randn(*shape).astype("float32")
y1, g1 = compute_baseline(x)
y2, g2 = compute_1d(x)
self.assertTrue(np.allclose(g1, g2))
self.assertTrue(np.allclose(y1, y2))
# [N, C, L]
shape = [1000000, 4, 4]
x = np.random.randn(*shape).astype("float32") x = np.random.randn(*shape).astype("float32")
y1, g1 = compute_v1(x) y1, g1 = compute_baseline(x)
y2, g2 = compute_v2(x) y2, g2 = compute_1d(x)
self.assertTrue(np.allclose(g1, g2)) self.assertTrue(np.allclose(g1, g2))
self.assertTrue(np.allclose(y1, y2)) self.assertTrue(np.allclose(y1, y2))
def test_eager_api_1d(self): def test_eager_api(self):
places = [fluid.CPUPlace()] places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0)) places.append(fluid.CUDAPlace(0))
for p in places: for p in places:
shape = [200000, 4] shape = [4, 10, 4, 4]
def compute_v1(x): def compute_v1(x):
with fluid.dygraph.guard(p): with fluid.dygraph.guard(p):
bn = fluid.dygraph.BatchNorm(shape[1]) bn = fluid.dygraph.BatchNorm(shape[1])
#bn = paddle.nn.BatchNorm2D(shape[1])
x1 = paddle.to_tensor(x) x1 = paddle.to_tensor(x)
x1.stop_gradient = False x1.stop_gradient = False
y = bn(x1) y = bn(x1)
...@@ -135,7 +143,8 @@ class TestBatchNorm(unittest.TestCase): ...@@ -135,7 +143,8 @@ class TestBatchNorm(unittest.TestCase):
def compute_v2(x): def compute_v2(x):
with fluid.dygraph.guard(p): with fluid.dygraph.guard(p):
with _test_eager_guard(): with _test_eager_guard():
bn = paddle.nn.BatchNorm1D(shape[1]) print("v2")
bn = paddle.nn.BatchNorm2D(shape[1])
x1 = paddle.to_tensor(x) x1 = paddle.to_tensor(x)
x1.stop_gradient = False x1.stop_gradient = False
y = bn(x1) y = bn(x1)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册