import numpy as np import megengine.functional as F import megengine.jit as jit import megengine.tensor as tensor from megengine.autodiff.grad_manager import GradManager def test_dropout(): def check_dropout(mge_val, xla_val, drop_prob): nr_zero = np.sum(np.array(xla_val == 0, np.uint32)) nr_el = np.prod(xla_val.shape) xla_drop_rate = nr_zero * 1.0 / nr_el np.testing.assert_allclose(drop_prob, xla_drop_rate, atol=1e-3) mge_mask = mge_val == 0 xla_mask = xla_val == 0 both_mask = np.bitwise_or(xla_mask, mge_mask) both_left = np.bitwise_not(both_mask) mge_left = mge_val * both_left xla_left = xla_val * both_left np.testing.assert_allclose(mge_left, xla_left, atol=1e-6) def tester(shape, drop_prob, dtype=None): dtype = dtype or np.float32 x = tensor(np.random.randn(*shape), dtype=dtype) dy = tensor(np.random.randn(*shape), dtype=dtype) gm = GradManager() @jit.trace(without_host=True, use_xla=True) def func(x, dy): gm.attach([x]) with gm: y = F.dropout(x, drop_prob, True) gm.backward(y, dy) return y, x.grad mge_rsts = func(x, dy) xla_rsts = func(x, dy) for mge_rst, xla_rst in zip(mge_rsts, xla_rsts): check_dropout(mge_rst.numpy(), xla_rst.numpy(), drop_prob) tester((32, 128, 128, 1, 16), 0.1) tester((32, 128, 128, 1, 16), 0.3) tester((32, 128, 128, 1, 16), 0.5) tester((32, 128, 128, 1, 16), 0.9) if __name__ == "__main__": test_dropout()