未验证 提交 ebff68fa 编写于 作者: Y Yang Zhang 提交者: GitHub

Add float16 support to `sync_batch_norm_op` (#19681)

* Add float16 support to `sync_batch_norm_op`

test=develop

* Add test for sync_bn with FP16 input

test=develop
上级 039b9710
......@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
test for sync bachnorm op.
for both FP64 and FP16 input.
"""
from __future__ import print_function
......@@ -22,9 +26,24 @@ import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid import compiler
from op_test import OpTest
def create_or_get_tensor(scope, var_name, var, place):
"""Get tensor, if not found, create a new one."""
tensor = scope.var(var_name).get_tensor()
if var is not None:
assert isinstance(var, np.ndarray)
tensor.set_recursive_sequence_lengths([])
tensor.set(var, place)
return tensor
class TestSyncBatchNormOpTraining(unittest.TestCase):
"""sync_batch_norm op test."""
def setUp(self):
"""Setup."""
#self.dtype = np.float32
self.dtype = np.float64
self.N = 32
......@@ -32,17 +51,20 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
self.H = 64
self.W = 32
self.dshape = [self.N, self.C, self.H, self.W]
self.atol = 1e-3
def build_program(self,
place,
layout,
seed,
sync_bn=False,
only_forward=False):
def _build_program(self,
place,
layout,
seed,
sync_bn=False,
only_forward=False):
"""Build program."""
main = fluid.Program()
startup = fluid.Program()
main.random_seed = seed
startup.random_seed = seed
use_cudnn = self.dtype == np.float16
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
data = fluid.layers.data(
......@@ -56,7 +78,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
filter_size=1,
param_attr=fluid.ParamAttr(name='conv2d_weight'),
bias_attr=False,
use_cudnn=False)
use_cudnn=use_cudnn)
bn = fluid.layers.batch_norm(
conv,
param_attr=fluid.ParamAttr(name='bn_scale'),
......@@ -65,6 +87,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
moving_variance_name='bn_moving_variance',
data_layout=layout,
is_test=only_forward)
bn = fluid.layers.cast(bn, 'float64')
sigmoid = fluid.layers.sigmoid(bn)
out = fluid.layers.reduce_sum(sigmoid)
if not sync_bn:
......@@ -74,13 +97,18 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
sgd_opt.backward(out)
return main, startup, [out, conv, bn]
def compare(self, place, layout, only_forward):
def _compare(self, place, layout, only_forward):
"""Compare results."""
seed = 10
os.environ['FLAGS_cudnn_deterministic'] = "1"
scope = core.Scope()
data = np.random.random(size=self.dshape).astype(self.dtype) * 4. - 2
data = create_or_get_tensor(scope, "input",
OpTest.np_dtype_to_fluid_dtype(data), place)
# Single-GPU, N = 32 per GPU
main, startup, outs = self.build_program(place, layout, seed, False,
only_forward)
main, startup, outs = self._build_program(place, layout, seed, False,
only_forward)
exe = fluid.Executor(place)
exe.run(startup)
fetch_names = [v.name for v in outs] + [
......@@ -99,8 +127,8 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
#####################################################################
# Multi-GPUs, self.N / core.get_cuda_device_count() per GPU
assert core.get_cuda_device_count() > 1
main, startup, outs = self.build_program(place, layout, seed, True,
only_forward)
main, startup, outs = self._build_program(place, layout, seed, True,
only_forward)
exe = fluid.Executor(place)
exe.run(startup)
fetch_names = [v.name for v in outs] + [
......@@ -133,27 +161,43 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
sync_bn_val = sync_bn_val[:bn_val.shape[0]]
self.assertTrue(
np.allclose(
bn_val, sync_bn_val, atol=1e-3),
bn_val, sync_bn_val, atol=self.atol),
"Output (" + fetch_names[i] + ") has diff. \n" + "\nBN " +
str(bn_val) + "\n" + "Sync BN " + str(sync_bn_val))
def test_train(self):
"""Test training."""
if not core.is_compiled_with_cuda():
return
places = [core.CUDAPlace(0)]
for place in places:
for layout in ["NCHW", "NHWC"]:
self.compare(place, layout, False)
self._compare(place, layout, False)
def test_infer(self):
"""Test inference."""
if not core.is_compiled_with_cuda():
return
places = [core.CUDAPlace(0)]
for place in places:
for layout in ["NCHW", "NHWC"]:
self.compare(place, layout, True)
self._compare(place, layout, True)
class TestFP16SyncBatchNormOpTraining(TestSyncBatchNormOpTraining):
"""sync_batch_norm op test for FP16 input."""
def setUp(self):
"""Setup."""
self.dtype = np.float16
self.N = 32
self.C = 16
self.H = 64
self.W = 32
self.dshape = [self.N, self.C, self.H, self.W]
self.atol = 1e-2
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册