test_tracing.py 16.4 KB
Newer Older
1 2 3
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
4
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 6 7 8
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
import inspect
M
Megvii Engine Team 已提交
10
import io
11
import itertools
12
from tempfile import mkstemp
M
Megvii Engine Team 已提交
13

M
Megvii Engine Team 已提交
14
import numpy as np
15
import pytest
M
Megvii Engine Team 已提交
16

17
import megengine.core.tensor.megbrain_graph as G
18
import megengine.functional as F
19
import megengine.optimizer as optim
20
import megengine.utils.comp_graph_tools as cgtools
21 22
from megengine import Parameter, tensor
from megengine.autodiff import GradManager
23
from megengine.core._trace_option import set_symbolic_shape
M
Megvii Engine Team 已提交
24
from megengine.core.ops import builtin as ops
25
from megengine.core.ops.builtin import Elemwise
26
from megengine.core.tensor.utils import isscalar
27
from megengine.functional import exp, log
28
from megengine.jit import GraphOptimizationConfig, exclude_from_trace, trace
29
from megengine.module import Module
30
from megengine.random import normal, uniform
31
from megengine.utils.naming import AutoNaming
M
Megvii Engine Team 已提交
32 33


34 35 36 37 38 39 40 41 42 43 44 45
@pytest.mark.parametrize("trace_mode", [False, True])
@pytest.mark.parametrize("return_mode", ["Value", "Tuple", "List", "Dict"])
def test_trace(trace_mode, return_mode):
    @trace(symbolic=trace_mode)
    def f(x):
        if return_mode == "Tuple":
            return (-x,)
        elif return_mode == "List":
            return [-x]
        elif return_mode == "Dict":
            return {"neg": -x}
        else:
46
            return -x
M
Megvii Engine Team 已提交
47

48 49 50 51 52 53
    def get_numpy(y):
        if return_mode == "Tuple" or return_mode == "List":
            return y[0].numpy()
        elif return_mode == "Dict":
            return y["neg"].numpy()
        return y.numpy()
M
Megvii Engine Team 已提交
54

55 56 57 58 59
    x = tensor([1])
    y = get_numpy(f(x))

    for i in range(3):
        np.testing.assert_equal(get_numpy(f(x)), y)
M
Megvii Engine Team 已提交
60 61


62 63 64 65 66 67 68 69 70 71 72 73
def test_output_copy_trace():
    class Simple(Module):
        def __init__(self):
            super().__init__()
            self.a = Parameter([1.0], dtype=np.float32)

        def forward(self, x):
            x = x * self.a
            # will result into a copy of output in grad
            x = F.exp(x)
            return x

74
    ys = {False: [], True: []}
75

76 77 78 79 80
    for symbolic in [False, True]:
        net = Simple()
        gm = GradManager().attach(net.parameters())
        opt = optim.SGD(net.parameters(), 1e-3, momentum=0.9)
        data = tensor(np.arange(4).reshape(2, 2), dtype="float32")
81

82 83 84 85 86 87 88
        @trace(symbolic=symbolic)
        def train_func(d):
            with gm:
                loss = net(d)
                gm.backward(loss)
                opt.step().clear_grad()
            return loss
89

90 91 92
        for i in range(3):
            y = train_func(data).numpy()
            ys[symbolic].append(y)
93

94 95
    for i in range(3):
        np.testing.assert_equal(ys[False][i], ys[True][i])
96

M
Megvii Engine Team 已提交
97

98 99 100 101 102 103 104 105 106 107
@pytest.mark.parametrize("trace_mode", [False, True])
def test_exclude_from_trace(trace_mode):
    @trace(symbolic=trace_mode)
    def f(x):
        x = -x
        with exclude_from_trace():
            if i % 2:
                x = -x
        x = -x
        return x
M
Megvii Engine Team 已提交
108

109
    x = tensor([1])
M
Megvii Engine Team 已提交
110

111 112 113
    for i in range(3):
        y = f(x).numpy()
        np.testing.assert_equal(f(x).numpy(), y)
M
Megvii Engine Team 已提交
114 115


116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
@pytest.mark.parametrize("trace_mode", [False, True])
def test_elemwise_fuse(trace_mode):
    # explicitly declare opt_level as 2
    @trace(symbolic=trace_mode, opt_level=2)
    def f(a, b):
        base = 0
        c = b - a
        _, idx = F.topk(c, 3)
        # internally, biased_idx will be idx as gopt will ignore the addition
        biased_idx = base + idx
        return biased_idx

    a = tensor(np.ones((7, 2)), dtype=np.int32)
    b = tensor(2 * np.ones((7, 2)), dtype=np.float32)

    for i in range(3):
        y = f(a, b)
        y.numpy()


@pytest.mark.parametrize("trace_mode", [False, True])
def test_elemwise_fuse_in_grad(trace_mode):
    w = Parameter(np.ones([4, 6]), dtype="float32")

    gm = GradManager().attach(w)
    opt = optim.SGD([w], lr=0.01, momentum=0.9, weight_decay=5e-4)

    # explicitly declare opt_level as 2
    @trace(symbolic=trace_mode, opt_level=2)
    def f():
        with gm:
            wm = F.sum(w ** 2, axis=1) ** 0.5
            loss = wm.mean()
            gm.backward(loss)
            opt.step().clear_grad()
        return loss

    for i in range(3):
        y = f()
        y.numpy()


M
Megvii Engine Team 已提交
158 159 160 161 162 163
def test_print_in_trace():
    for symbolic in [False]:  # cannot read value in symbolic mode

        @trace(symbolic=symbolic)
        def f(x):
            nonlocal buf
164
            x = -x
M
Megvii Engine Team 已提交
165
            buf = x.numpy()
166
            x = -x
M
Megvii Engine Team 已提交
167 168 169
            return x

        buf = None
170
        x = tensor([1])
M
Megvii Engine Team 已提交
171 172

        for i in range(3):
173
            y = f(x).numpy()
M
Megvii Engine Team 已提交
174 175
            z = buf
            buf = None
176
            np.testing.assert_equal(f(x).numpy(), y)
M
Megvii Engine Team 已提交
177
            np.testing.assert_equal(z, buf)
M
Megvii Engine Team 已提交
178 179 180


def test_dump():
181 182
    @trace(symbolic=True, capture_as_const=True)
    def f(a, b):
183
        return a + b
184

185
    # prevent from remaining scope from exception test
186
    AutoNaming.clear()
187 188 189
    a = tensor([2])
    b = tensor([4])
    y = f(a, b).numpy()
190 191

    for i in range(3):
192
        np.testing.assert_equal(f(a, b).numpy(), y)
193 194

    file = io.BytesIO()
195 196
    dump_info = f.dump(file)
    assert dump_info.nr_opr == 3
197
    np.testing.assert_equal(dump_info.inputs, ["arg_0", "arg_1"])
198
    np.testing.assert_equal(dump_info.outputs, ["ADD"])
199
    file.seek(0)
200 201
    infer_cg = cgtools.GraphInference(file)
    result = list((infer_cg.run(a, b)).values())[0]
202 203 204 205
    np.testing.assert_equal(result[0], y)


def test_capture_dump():
206
    a = tensor([2])
207 208 209

    @trace(symbolic=True, capture_as_const=True)
    def f(x):
210
        return x * a
211

212 213
    x = tensor([3])
    y = f(x).numpy()
214 215

    for i in range(3):
216
        np.testing.assert_equal(f(x).numpy(), y)
217 218 219 220

    file = io.BytesIO()
    f.dump(file)
    file.seek(0)
221 222
    infer_cg = cgtools.GraphInference(file)
    result = list((infer_cg.run(x)).values())[0]
223 224 225 226
    np.testing.assert_equal(result[0], y)


def test_dump_volatile():
227
    p = tensor([2])
228

M
Megvii Engine Team 已提交
229 230
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
231
        return x * p
M
Megvii Engine Team 已提交
232

233 234
    x = tensor([3])
    y = f(x).numpy()
M
Megvii Engine Team 已提交
235 236

    for i in range(3):
237
        np.testing.assert_equal(f(x).numpy(), y)
M
Megvii Engine Team 已提交
238 239

    file = io.BytesIO()
240
    f.dump(file, optimize_for_inference=False)
241
    file.seek(0)
242
    (out,) = G.load_graph(file).output_vars_list
243 244
    assert (
        cgtools.get_owner_opr_type(cgtools.get_owner_opr_inputs(out)[1])
245
        == "ImmutableTensor"
246
    )
247 248


249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
def test_dump_backward_graph():
    x0 = tensor(np.random.randn(3, 4))
    x1 = tensor(np.random.randn(3, 4))

    gm = GradManager().attach(x0)

    @trace(symbolic=True, capture_as_const=True)
    def f(x0, x1):
        with gm:
            y = x0 * x1
            gm.backward(y, F.ones_like(y))
            dx0 = x0.grad
        return y, dx0

    y, dx0 = f(x0, x1)
    np.testing.assert_equal(dx0.numpy(), x1)

    file = io.BytesIO()
    f.dump(file, optimize_for_inference=False)
    file.seek(0)

    infer_cg = cgtools.GraphInference(file)
    results = list((infer_cg.run(x0, x1)).values())

    np.testing.assert_equal(results[0], y)
    np.testing.assert_equal(results[1], dx0)


277 278 279 280 281
@pytest.mark.parametrize("trace_mode", [False, True])
def test_trace_profiler(trace_mode):
    @trace(symbolic=trace_mode, profiling=True)
    def f(x):
        return -x
282

283 284
    x = tensor([1])
    y = f(x).numpy()
285

286 287
    f(x)
    f(x)  # XXX: has to run twice
288

289 290
    out = f.get_profile()
    assert out.get("profiler")
291 292


293
def test_goptions():
294 295
    @trace(symbolic=True, opt_level=0, capture_as_const=True)
    def f(x):
296 297 298 299
        # directly return x / x will not trigger gopt
        # since there's no way to tell the two x are the same
        y = 2.0 * x
        return y / y
300 301 302

    @trace(symbolic=True, opt_level=1, capture_as_const=True)
    def g(x):
303 304
        y = 2.0 * x
        return y / y
305

306 307 308
    d = tensor(0.0)
    assert not np.isfinite(f(d).numpy())
    np.testing.assert_equal(g(d).numpy().item(), 1.0)
309 310 311 312 313 314 315 316 317 318 319


def test_goptions_log_sum_exp():
    @trace(symbolic=True, opt_level=0, capture_as_const=True)
    def f(x, y):
        return log(exp(x) + exp(y))

    @trace(symbolic=True, opt_level=1, capture_as_const=True)
    def g(x, y):
        return log(exp(x) + exp(y))

320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
    val = 1.0e4
    d = tensor(val)
    o = tensor(0.0)
    assert not np.isfinite(f(d, o).numpy())
    np.testing.assert_almost_equal(g(d, o), val)


def test_goptions_log_exp():
    @trace(symbolic=True, opt_level=0, capture_as_const=True)
    def f(x):
        return log(exp(x))

    @trace(symbolic=True, opt_level=1, capture_as_const=True)
    def g(x):
        return log(exp(x))

    f(tensor(1.0))
337
    _, out = mkstemp()
338
    f.dump(out, optimize_for_inference=False)
339
    outputs = G.load_graph(out).output_vars_list
340 341
    oprs_1 = cgtools.get_oprs_seq(outputs)

342
    g(tensor(1.0))
343
    g.dump(out, optimize_for_inference=False)
344
    outputs = G.load_graph(out).output_vars_list
345 346 347 348 349 350 351 352 353 354 355 356
    oprs_2 = cgtools.get_oprs_seq(outputs)

    assert len(oprs_1) - len(oprs_2) == 2


def test_optimize_for_inference():
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
        return exp(x)

    _, out = mkstemp()
    f(tensor(5.0))
357
    f.dump(out, enable_io16xc32=True)
358

359
    res = G.load_graph(out)
360 361
    computing_input = res.output_vars_list[0].owner.inputs[0]
    assert computing_input.dtype == np.float16
362 363


364 365 366
def test_optimize_for_inference_broadcast():
    a = tensor(np.ones(1, dtype=np.float32))

367
    @trace(capture_as_const=True, symbolic_shape=True)
368
    def f():
369
        return a._broadcast(tensor([1, 10], dtype=np.int32))
370 371 372 373 374

    f()
    f.dump(io.BytesIO())


375 376 377 378 379
def test_trace_cvt_bool():
    x = tensor([0], dtype=np.int32)

    @trace(symbolic=True)
    def f(x):
380 381 382 383
        a = x.shape
        b = a[0]
        assert isscalar(b)
        return b == 0
384 385

    for i in range(3):
386
        np.testing.assert_equal(f(x).numpy(), False)
387 388


389 390 391 392 393
@pytest.mark.parametrize("trace_mode", [False, True])
def test_trace_reshape(trace_mode):
    x1 = tensor(np.random.randn(2, 10, 10))
    x2 = tensor(np.random.randn(4, 10, 10))
    x3 = tensor(np.random.randn(8, 10, 10))
394

395 396 397 398
    @trace(symbolic=trace_mode, capture_as_const=True)
    def f(x):
        y = x.reshape(x.shape[0], 100)
        return y
399

400 401 402
    f(x1)
    f(x2)
    f(x3)
403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429


def test_trace_topk():
    x = tensor([5, 2, 7, 1, 0, 3, 2])

    @trace(symbolic=True)
    def f(x):
        y = F.topk(x, 3)
        np.testing.assert_equal(y[0].shape.numpy(), np.array([3,]))
        return y

    for i in range(3):
        f(x)


def test_trace_warp_perspective():
    inp_shape = (1, 1, 4, 4)
    x = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape))
    M_shape = (1, 3, 3)
    M = tensor(
        np.array(
            [[1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0]], dtype=np.float32
        ).reshape(M_shape)
    )

    @trace(symbolic=True)
    def f(x, M):
430
        out = F.vision.warp_perspective(x, M, (2, 2))
431 432 433
        np.testing.assert_equal(out.shape.numpy(), np.array([1, 1, 2, 2]))
        return out

434
    for i in range(3):
435
        f(x, M)
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467


def test_raise_on_trace():
    step_count = 0
    catch_count = 0
    bad_step = 10

    class CatchMe(Exception):
        pass

    a = tensor([1, 2, 3, 4])
    b = tensor([5, 6, 7, 8])
    c = tensor([9, 0, 1, 2])

    @trace
    def add_abc(a, b, c):
        ps = a + b
        result = ps + c
        if step_count == bad_step:
            raise CatchMe("catch me")
        return result

    for i in range(100):
        try:
            d = add_abc(a, b, c)
        except CatchMe as e:
            catch_count += 1
        else:
            np.testing.assert_equal(d.numpy(), (a + b + c).numpy())
        step_count += 1

    assert catch_count == 1
468 469


470 471 472 473 474
@pytest.mark.parametrize("trace_mode", [False, True])
def test_trace_broadcast(trace_mode):
    x1 = tensor(np.random.randn(3, 1, 1))
    x2 = tensor(np.random.randn(1, 4, 1))
    x3 = tensor(np.random.randn(1, 1, 5))
475

476 477 478 479
    @trace(symbolic=trace_mode, capture_as_const=True)
    def f(x):
        y = F.broadcast_to(x, (3, 4, 5))
        return y
480

481 482 483
    f(x1)
    f(x2)
    f(x3)
484 485 486 487 488 489 490 491 492 493 494 495 496 497


def test_trace_nms():
    def make_inputs(n):
        boxes = np.zeros((n, 4))
        boxes[:, :2] = np.random.rand(n, 2) * 100
        boxes[:, 2:] = np.random.rand(n, 2) * 100 + 100

        scores = np.random.rand(n)

        return tensor(boxes), tensor(scores)

    @trace(symbolic=False)
    def f(boxes, scores):
498
        # with tracing, max_output must be specified
499
        results = F.vision.nms(boxes, scores=scores, iou_thresh=0.5, max_output=20)
500
        # without tracing, max output can be inferred inside nms
501
        with exclude_from_trace():
502
            _ = F.vision.nms(boxes, scores=scores, iou_thresh=0.5)
503 504 505 506 507
        return results

    f(*make_inputs(10))
    f(*make_inputs(20))
    f(*make_inputs(30))
508 509 510 511 512 513 514 515 516 517 518 519 520 521


def test_trace_valid_broadcast():
    x1 = tensor(np.random.randn(1, 1))
    x2 = tensor(np.random.randn(1, 2))
    shape = (tensor([2]), tensor([2]))

    @trace(symbolic=False)
    def f(x, shape):
        y = F.broadcast_to(x, shape)
        return y

    f(x1, shape)
    f(x2, shape)
522 523


524 525
@pytest.mark.parametrize("trace_mode", [False, True])
def test_clip(trace_mode):
526 527
    x = tensor(np.random.randn(10, 10))

528
    @trace(symbolic=trace_mode)
529 530 531 532 533 534
    def f(x, lower, upper):
        y = F.clip(x, lower, upper)
        return y

    for i in range(3):
        f(x, tensor([0]), tensor([1]))
535

536 537 538
    for i in range(3):
        f(x, tensor([5]), tensor([4]))

539 540 541 542 543 544 545 546 547 548 549 550

# test returning noncontiguous tensor from trace
def test_slice():
    @trace
    def f(x):
        return x[:, 1::2]

    x = F.arange(8).reshape(2, 4)
    f(x)
    y = f(x)
    np.testing.assert_array_equal(y.numpy(), x.numpy()[:, 1::2])
    y + y
551 552


553 554
@pytest.mark.parametrize("shape_mode", [False, True])
def test_random(shape_mode):
555
    def run_test(op):
556 557 558 559 560 561 562 563 564 565 566
        @trace(symbolic=True, symbolic_shape=shape_mode)
        def f():
            out = op(size=[10, 10])
            out_shape = out.shape
            assert out_shape is not None
            if not isinstance(out_shape, tuple):
                assert out.shape.numpy() is not None
            return out

        for _ in range(3):
            f()
567 568 569

    run_test(uniform)
    run_test(normal)
570 571 572 573 574 575


@pytest.mark.parametrize("shape_mode", [False, True])
def test_trace_advance_indexing(shape_mode):
    funcs = [
        lambda x, i: x[i],
576
        lambda x, i, j: x[i, j],
577
        lambda x, i, j: x[i, :, j, ...],
578
        lambda x, start, end: x[start:end],
579 580 581 582 583 584 585 586 587 588
        lambda x, start, end: x[:, 0, start:end, ..., 1],
        lambda x, vec: x[vec],
        lambda x, vec: x[vec, ..., 0, 1:3],
        lambda x, vec: x[vec, vec[0], vec[1]],
        # lambda x, i, start, end, vec: x[i, ..., :, vec, start:end],  # FIXME
        lambda x, mask: x[mask],
    ]

    inputs = {
        "x": np.random.randn(5, 5, 5, 5, 5).astype("float32"),
589
        "i": 4,
590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610
        "j": 2,
        "start": 1,
        "end": 3,
        "vec": [1, 2, 3],
        "mask": np.random.randn(5, 5, 5, 5, 5) >= 0,
    }
    for f in funcs:
        sig = inspect.signature(f)
        param_names = list(sig._parameters.keys())
        params = {}
        params_np = {}
        f_traced = trace(f, symbolic=False, symbolic_shape=shape_mode)
        for name in param_names:
            params[name] = tensor(inputs[name])
            params_np[name] = inputs[name]
        expected = f(**params_np)
        result_imperative = f(**params)
        np.testing.assert_equal(expected, result_imperative.numpy())
        for _ in range(3):
            result_trace = f_traced(**params)
            np.testing.assert_equal(expected, result_trace.numpy())
611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637


@pytest.mark.require_ngpu(1)  # nvrtc backend
def test_trace_jit_config():
    def run(fuse_dimshuffle, fuse_reduce):
        config = GraphOptimizationConfig()
        config.jit_fuse_dimshuffle = fuse_dimshuffle
        config.jit_fuse_reduce = fuse_reduce

        # set opt_level = 1 to avoid fusing dimshuffle and reduce at the same time
        @trace(opt_level=1, graph_opt_config=config)
        def func(x):
            return x + 1

        x = tensor(2)
        y = func(x)
        func._compile()

        options = func._graph.options
        mapping = {None: 0, False: 1, True: 2}
        assert options.graph_opt.jit == 0
        assert options.graph_opt.jit_config.fuse_dimshuffle == mapping[fuse_dimshuffle]
        assert options.graph_opt.jit_config.fuse_reduce == mapping[fuse_reduce]

    for fuse_dimshuffle in [None, False, True]:
        for fuse_reduce in [None, False, True]:
            run(fuse_dimshuffle, fuse_reduce)