未验证 提交 ebff68fa 编写于 作者: Y Yang Zhang 提交者: GitHub

Add float16 support to `sync_batch_norm_op` (#19681)

* Add float16 support to `sync_batch_norm_op`

test=develop

* Add test for sync_bn with FP16 input

test=develop
上级 039b9710
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
test for sync bachnorm op.
for both FP64 and FP16 input.
"""
from __future__ import print_function from __future__ import print_function
...@@ -22,9 +26,24 @@ import paddle.fluid.core as core ...@@ -22,9 +26,24 @@ import paddle.fluid.core as core
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import compiler from paddle.fluid import compiler
from op_test import OpTest
def create_or_get_tensor(scope, var_name, var, place):
"""Get tensor, if not found, create a new one."""
tensor = scope.var(var_name).get_tensor()
if var is not None:
assert isinstance(var, np.ndarray)
tensor.set_recursive_sequence_lengths([])
tensor.set(var, place)
return tensor
class TestSyncBatchNormOpTraining(unittest.TestCase): class TestSyncBatchNormOpTraining(unittest.TestCase):
"""sync_batch_norm op test."""
def setUp(self): def setUp(self):
"""Setup."""
#self.dtype = np.float32 #self.dtype = np.float32
self.dtype = np.float64 self.dtype = np.float64
self.N = 32 self.N = 32
...@@ -32,17 +51,20 @@ class TestSyncBatchNormOpTraining(unittest.TestCase): ...@@ -32,17 +51,20 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
self.H = 64 self.H = 64
self.W = 32 self.W = 32
self.dshape = [self.N, self.C, self.H, self.W] self.dshape = [self.N, self.C, self.H, self.W]
self.atol = 1e-3
def build_program(self, def _build_program(self,
place, place,
layout, layout,
seed, seed,
sync_bn=False, sync_bn=False,
only_forward=False): only_forward=False):
"""Build program."""
main = fluid.Program() main = fluid.Program()
startup = fluid.Program() startup = fluid.Program()
main.random_seed = seed main.random_seed = seed
startup.random_seed = seed startup.random_seed = seed
use_cudnn = self.dtype == np.float16
with fluid.unique_name.guard(): with fluid.unique_name.guard():
with fluid.program_guard(main, startup): with fluid.program_guard(main, startup):
data = fluid.layers.data( data = fluid.layers.data(
...@@ -56,7 +78,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase): ...@@ -56,7 +78,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
filter_size=1, filter_size=1,
param_attr=fluid.ParamAttr(name='conv2d_weight'), param_attr=fluid.ParamAttr(name='conv2d_weight'),
bias_attr=False, bias_attr=False,
use_cudnn=False) use_cudnn=use_cudnn)
bn = fluid.layers.batch_norm( bn = fluid.layers.batch_norm(
conv, conv,
param_attr=fluid.ParamAttr(name='bn_scale'), param_attr=fluid.ParamAttr(name='bn_scale'),
...@@ -65,6 +87,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase): ...@@ -65,6 +87,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
moving_variance_name='bn_moving_variance', moving_variance_name='bn_moving_variance',
data_layout=layout, data_layout=layout,
is_test=only_forward) is_test=only_forward)
bn = fluid.layers.cast(bn, 'float64')
sigmoid = fluid.layers.sigmoid(bn) sigmoid = fluid.layers.sigmoid(bn)
out = fluid.layers.reduce_sum(sigmoid) out = fluid.layers.reduce_sum(sigmoid)
if not sync_bn: if not sync_bn:
...@@ -74,13 +97,18 @@ class TestSyncBatchNormOpTraining(unittest.TestCase): ...@@ -74,13 +97,18 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
sgd_opt.backward(out) sgd_opt.backward(out)
return main, startup, [out, conv, bn] return main, startup, [out, conv, bn]
def compare(self, place, layout, only_forward): def _compare(self, place, layout, only_forward):
"""Compare results."""
seed = 10 seed = 10
os.environ['FLAGS_cudnn_deterministic'] = "1" os.environ['FLAGS_cudnn_deterministic'] = "1"
scope = core.Scope()
data = np.random.random(size=self.dshape).astype(self.dtype) * 4. - 2 data = np.random.random(size=self.dshape).astype(self.dtype) * 4. - 2
data = create_or_get_tensor(scope, "input",
OpTest.np_dtype_to_fluid_dtype(data), place)
# Single-GPU, N = 32 per GPU # Single-GPU, N = 32 per GPU
main, startup, outs = self.build_program(place, layout, seed, False, main, startup, outs = self._build_program(place, layout, seed, False,
only_forward) only_forward)
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(startup) exe.run(startup)
fetch_names = [v.name for v in outs] + [ fetch_names = [v.name for v in outs] + [
...@@ -99,8 +127,8 @@ class TestSyncBatchNormOpTraining(unittest.TestCase): ...@@ -99,8 +127,8 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
##################################################################### #####################################################################
# Multi-GPUs, self.N / core.get_cuda_device_count() per GPU # Multi-GPUs, self.N / core.get_cuda_device_count() per GPU
assert core.get_cuda_device_count() > 1 assert core.get_cuda_device_count() > 1
main, startup, outs = self.build_program(place, layout, seed, True, main, startup, outs = self._build_program(place, layout, seed, True,
only_forward) only_forward)
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(startup) exe.run(startup)
fetch_names = [v.name for v in outs] + [ fetch_names = [v.name for v in outs] + [
...@@ -133,27 +161,43 @@ class TestSyncBatchNormOpTraining(unittest.TestCase): ...@@ -133,27 +161,43 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
sync_bn_val = sync_bn_val[:bn_val.shape[0]] sync_bn_val = sync_bn_val[:bn_val.shape[0]]
self.assertTrue( self.assertTrue(
np.allclose( np.allclose(
bn_val, sync_bn_val, atol=1e-3), bn_val, sync_bn_val, atol=self.atol),
"Output (" + fetch_names[i] + ") has diff. \n" + "\nBN " + "Output (" + fetch_names[i] + ") has diff. \n" + "\nBN " +
str(bn_val) + "\n" + "Sync BN " + str(sync_bn_val)) str(bn_val) + "\n" + "Sync BN " + str(sync_bn_val))
def test_train(self): def test_train(self):
"""Test training."""
if not core.is_compiled_with_cuda(): if not core.is_compiled_with_cuda():
return return
places = [core.CUDAPlace(0)] places = [core.CUDAPlace(0)]
for place in places: for place in places:
for layout in ["NCHW", "NHWC"]: for layout in ["NCHW", "NHWC"]:
self.compare(place, layout, False) self._compare(place, layout, False)
def test_infer(self): def test_infer(self):
"""Test inference."""
if not core.is_compiled_with_cuda(): if not core.is_compiled_with_cuda():
return return
places = [core.CUDAPlace(0)] places = [core.CUDAPlace(0)]
for place in places: for place in places:
for layout in ["NCHW", "NHWC"]: for layout in ["NCHW", "NHWC"]:
self.compare(place, layout, True) self._compare(place, layout, True)
class TestFP16SyncBatchNormOpTraining(TestSyncBatchNormOpTraining):
"""sync_batch_norm op test for FP16 input."""
def setUp(self):
"""Setup."""
self.dtype = np.float16
self.N = 32
self.C = 16
self.H = 64
self.W = 32
self.dshape = [self.N, self.C, self.H, self.W]
self.atol = 1e-2
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册