import numpy as np import pytest import megengine as mge from megengine.amp import GradScaler from megengine.autodiff import GradManager from megengine.jit import trace @pytest.mark.parametrize( "is_trace", [False, True], ) def test_grad_scaler(is_trace): gm = GradManager() scaler = GradScaler() def f(idx, data, calc): x = mge.tensor(data, no_cache=True) y = mge.tensor(data, no_cache=True) if is_trace: calc = trace(calc) gm.attach([x, y]) with gm: loss = calc(x, y) scaler.backward(gm, loss, unscale_grad=False) np.testing.assert_equal(x.grad.numpy(), 2 * scaler.scale_factor) scaler.unscale(filter(lambda t: t.grad is not None, gm.attached_tensors())) # scaler.unscale(gm.attached_tensors()) np.testing.assert_equal(x.grad.numpy(), 2) def double_variables(x, y): z = x + 2 * y loss = 2 * z + 1 return loss def single_variable(x, y): z = x + 1 loss = 2 * z + 1 return loss # need grad being unique storage or not inplace modifying grad def double_variables_with_same_grad(x, y): z = x + y loss = 2 * z + 1 return loss for data in [np.random.random((1, 2, 3, 4)), 1.0]: for calc in [double_variables, single_variable, double_variables_with_same_grad]: for idx in range(3): f(idx, data, calc)