test_tracing.py 17.0 KB
Newer Older
1 2 3
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
4
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 6 7 8
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
import inspect
M
Megvii Engine Team 已提交
10
import io
11
import itertools
12
from tempfile import mkstemp
M
Megvii Engine Team 已提交
13

M
Megvii Engine Team 已提交
14
import numpy as np
15
import pytest
M
Megvii Engine Team 已提交
16

17
import megengine.core.tensor.megbrain_graph as G
18
import megengine.functional as F
19
import megengine.optimizer as optim
20
import megengine.utils.comp_graph_tools as cgtools
21 22
from megengine import Parameter, tensor
from megengine.autodiff import GradManager
23
from megengine.core._trace_option import set_symbolic_shape
M
Megvii Engine Team 已提交
24
from megengine.core.ops import builtin as ops
25
from megengine.core.ops.builtin import Elemwise
26
from megengine.core.tensor.utils import isscalar
27
from megengine.functional import exp, log
28
from megengine.jit import GraphOptimizationConfig, exclude_from_trace, trace
29
from megengine.module import Module
30
from megengine.random import normal, uniform
31
from megengine.utils.naming import AutoNaming
M
Megvii Engine Team 已提交
32 33


34 35 36 37 38 39 40 41 42 43 44 45
@pytest.mark.parametrize("trace_mode", [False, True])
@pytest.mark.parametrize("return_mode", ["Value", "Tuple", "List", "Dict"])
def test_trace(trace_mode, return_mode):
    @trace(symbolic=trace_mode)
    def f(x):
        if return_mode == "Tuple":
            return (-x,)
        elif return_mode == "List":
            return [-x]
        elif return_mode == "Dict":
            return {"neg": -x}
        else:
46
            return -x
M
Megvii Engine Team 已提交
47

48 49 50 51 52 53
    def get_numpy(y):
        if return_mode == "Tuple" or return_mode == "List":
            return y[0].numpy()
        elif return_mode == "Dict":
            return y["neg"].numpy()
        return y.numpy()
M
Megvii Engine Team 已提交
54

55 56 57 58 59
    x = tensor([1])
    y = get_numpy(f(x))

    for i in range(3):
        np.testing.assert_equal(get_numpy(f(x)), y)
M
Megvii Engine Team 已提交
60 61


62 63 64 65 66 67 68 69 70 71 72 73
def test_output_copy_trace():
    class Simple(Module):
        def __init__(self):
            super().__init__()
            self.a = Parameter([1.0], dtype=np.float32)

        def forward(self, x):
            x = x * self.a
            # will result into a copy of output in grad
            x = F.exp(x)
            return x

74
    ys = {False: [], True: []}
75

76 77 78 79 80
    for symbolic in [False, True]:
        net = Simple()
        gm = GradManager().attach(net.parameters())
        opt = optim.SGD(net.parameters(), 1e-3, momentum=0.9)
        data = tensor(np.arange(4).reshape(2, 2), dtype="float32")
81

82 83 84 85 86 87 88
        @trace(symbolic=symbolic)
        def train_func(d):
            with gm:
                loss = net(d)
                gm.backward(loss)
                opt.step().clear_grad()
            return loss
89

90 91 92
        for i in range(3):
            y = train_func(data).numpy()
            ys[symbolic].append(y)
93

94 95
    for i in range(3):
        np.testing.assert_equal(ys[False][i], ys[True][i])
96

M
Megvii Engine Team 已提交
97

98 99 100 101 102 103 104 105 106 107 108 109 110
@pytest.mark.parametrize("trace_mode", [False, True])
def test_tensor_detach(trace_mode):
    @trace(symbolic=True)
    def f(x):
        y = x.detach() ** 2
        z = y.detach() + 1
        return z.detach()

    x = tensor([1, 2, 3, 4])
    for _ in range(3):
        f(x).numpy()


111 112 113 114 115 116 117 118 119 120
@pytest.mark.parametrize("trace_mode", [False, True])
def test_exclude_from_trace(trace_mode):
    @trace(symbolic=trace_mode)
    def f(x):
        x = -x
        with exclude_from_trace():
            if i % 2:
                x = -x
        x = -x
        return x
M
Megvii Engine Team 已提交
121

122
    x = tensor([1])
M
Megvii Engine Team 已提交
123

124 125 126
    for i in range(3):
        y = f(x).numpy()
        np.testing.assert_equal(f(x).numpy(), y)
M
Megvii Engine Team 已提交
127 128


129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
@pytest.mark.parametrize("trace_mode", [False, True])
def test_elemwise_fuse(trace_mode):
    # explicitly declare opt_level as 2
    @trace(symbolic=trace_mode, opt_level=2)
    def f(a, b):
        base = 0
        c = b - a
        _, idx = F.topk(c, 3)
        # internally, biased_idx will be idx as gopt will ignore the addition
        biased_idx = base + idx
        return biased_idx

    a = tensor(np.ones((7, 2)), dtype=np.int32)
    b = tensor(2 * np.ones((7, 2)), dtype=np.float32)

    for i in range(3):
        y = f(a, b)
        y.numpy()


@pytest.mark.parametrize("trace_mode", [False, True])
def test_elemwise_fuse_in_grad(trace_mode):
    w = Parameter(np.ones([4, 6]), dtype="float32")

    gm = GradManager().attach(w)
    opt = optim.SGD([w], lr=0.01, momentum=0.9, weight_decay=5e-4)

    # explicitly declare opt_level as 2
    @trace(symbolic=trace_mode, opt_level=2)
    def f():
        with gm:
            wm = F.sum(w ** 2, axis=1) ** 0.5
            loss = wm.mean()
            gm.backward(loss)
            opt.step().clear_grad()
        return loss

    for i in range(3):
        y = f()
        y.numpy()


M
Megvii Engine Team 已提交
171 172 173 174 175 176
def test_print_in_trace():
    for symbolic in [False]:  # cannot read value in symbolic mode

        @trace(symbolic=symbolic)
        def f(x):
            nonlocal buf
177
            x = -x
M
Megvii Engine Team 已提交
178
            buf = x.numpy()
179
            x = -x
M
Megvii Engine Team 已提交
180 181 182
            return x

        buf = None
183
        x = tensor([1])
M
Megvii Engine Team 已提交
184 185

        for i in range(3):
186
            y = f(x).numpy()
M
Megvii Engine Team 已提交
187 188
            z = buf
            buf = None
189
            np.testing.assert_equal(f(x).numpy(), y)
M
Megvii Engine Team 已提交
190
            np.testing.assert_equal(z, buf)
M
Megvii Engine Team 已提交
191 192


193 194 195 196 197 198 199
@pytest.mark.parametrize(
    "dump_format",
    [
        "FBS",
    ],
)
def test_dump(dump_format):
200 201
    @trace(symbolic=True, capture_as_const=True)
    def f(a, b):
202
        return a + b
203

204
    # prevent from remaining scope from exception test
205
    AutoNaming.clear()
206 207 208
    a = tensor([2])
    b = tensor([4])
    y = f(a, b).numpy()
209 210

    for i in range(3):
211
        np.testing.assert_equal(f(a, b).numpy(), y)
212 213

    file = io.BytesIO()
214
    dump_info = f.dump(file, dump_format=dump_format)
215
    assert dump_info.nr_opr == 3
216
    np.testing.assert_equal(dump_info.inputs, ["arg_0", "arg_1"])
217
    np.testing.assert_equal(dump_info.outputs, ["ADD"])
218
    file.seek(0)
219 220
    infer_cg = cgtools.GraphInference(file)
    result = list((infer_cg.run(a, b)).values())[0]
221 222 223 224
    np.testing.assert_equal(result[0], y)


def test_capture_dump():
225
    a = tensor([2])
226 227 228

    @trace(symbolic=True, capture_as_const=True)
    def f(x):
229
        return x * a
230

231 232
    x = tensor([3])
    y = f(x).numpy()
233 234

    for i in range(3):
235
        np.testing.assert_equal(f(x).numpy(), y)
236 237 238 239

    file = io.BytesIO()
    f.dump(file)
    file.seek(0)
240 241
    infer_cg = cgtools.GraphInference(file)
    result = list((infer_cg.run(x)).values())[0]
242 243 244 245
    np.testing.assert_equal(result[0], y)


def test_dump_volatile():
246
    p = tensor([2])
247

M
Megvii Engine Team 已提交
248 249
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
250
        return x * p
M
Megvii Engine Team 已提交
251

252 253
    x = tensor([3])
    y = f(x).numpy()
M
Megvii Engine Team 已提交
254 255

    for i in range(3):
256
        np.testing.assert_equal(f(x).numpy(), y)
M
Megvii Engine Team 已提交
257 258

    file = io.BytesIO()
259
    f.dump(file, optimize_for_inference=False)
260
    file.seek(0)
261
    (out,) = G.load_graph(file).output_vars_list
262 263
    assert (
        cgtools.get_owner_opr_type(cgtools.get_owner_opr_inputs(out)[1])
264
        == "ImmutableTensor"
265
    )
266 267


268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
def test_dump_backward_graph():
    x0 = tensor(np.random.randn(3, 4))
    x1 = tensor(np.random.randn(3, 4))

    gm = GradManager().attach(x0)

    @trace(symbolic=True, capture_as_const=True)
    def f(x0, x1):
        with gm:
            y = x0 * x1
            gm.backward(y, F.ones_like(y))
            dx0 = x0.grad
        return y, dx0

    y, dx0 = f(x0, x1)
    np.testing.assert_equal(dx0.numpy(), x1)

    file = io.BytesIO()
    f.dump(file, optimize_for_inference=False)
    file.seek(0)

    infer_cg = cgtools.GraphInference(file)
    results = list((infer_cg.run(x0, x1)).values())

    np.testing.assert_equal(results[0], y)
    np.testing.assert_equal(results[1], dx0)


296 297 298 299 300 301 302 303 304 305
def test_dump_with_testcase():
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
        return exp(x)

    f(tensor(1.0))
    file = io.BytesIO()
    f.dump(file, input_data=["#rand(0, 255, 1)"])


306 307 308 309 310
@pytest.mark.parametrize("trace_mode", [False, True])
def test_trace_profiler(trace_mode):
    @trace(symbolic=trace_mode, profiling=True)
    def f(x):
        return -x
311

312 313
    x = tensor([1])
    y = f(x).numpy()
314

315 316
    f(x)
    f(x)  # XXX: has to run twice
317

318 319
    out = f.get_profile()
    assert out.get("profiler")
320 321


322
def test_goptions():
323 324
    @trace(symbolic=True, opt_level=0, capture_as_const=True)
    def f(x):
325 326 327 328
        # directly return x / x will not trigger gopt
        # since there's no way to tell the two x are the same
        y = 2.0 * x
        return y / y
329 330 331

    @trace(symbolic=True, opt_level=1, capture_as_const=True)
    def g(x):
332 333
        y = 2.0 * x
        return y / y
334

335 336 337
    d = tensor(0.0)
    assert not np.isfinite(f(d).numpy())
    np.testing.assert_equal(g(d).numpy().item(), 1.0)
338 339 340 341 342 343 344 345 346 347 348


def test_goptions_log_sum_exp():
    @trace(symbolic=True, opt_level=0, capture_as_const=True)
    def f(x, y):
        return log(exp(x) + exp(y))

    @trace(symbolic=True, opt_level=1, capture_as_const=True)
    def g(x, y):
        return log(exp(x) + exp(y))

349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
    val = 1.0e4
    d = tensor(val)
    o = tensor(0.0)
    assert not np.isfinite(f(d, o).numpy())
    np.testing.assert_almost_equal(g(d, o), val)


def test_goptions_log_exp():
    @trace(symbolic=True, opt_level=0, capture_as_const=True)
    def f(x):
        return log(exp(x))

    @trace(symbolic=True, opt_level=1, capture_as_const=True)
    def g(x):
        return log(exp(x))

    f(tensor(1.0))
366
    _, out = mkstemp()
367
    f.dump(out, optimize_for_inference=False)
368
    outputs = G.load_graph(out).output_vars_list
369 370
    oprs_1 = cgtools.get_oprs_seq(outputs)

371
    g(tensor(1.0))
372
    g.dump(out, optimize_for_inference=False)
373
    outputs = G.load_graph(out).output_vars_list
374 375 376 377 378 379 380 381 382 383 384 385
    oprs_2 = cgtools.get_oprs_seq(outputs)

    assert len(oprs_1) - len(oprs_2) == 2


def test_optimize_for_inference():
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
        return exp(x)

    _, out = mkstemp()
    f(tensor(5.0))
386
    f.dump(out, enable_io16xc32=True)
387

388
    res = G.load_graph(out)
389 390
    computing_input = res.output_vars_list[0].owner.inputs[0]
    assert computing_input.dtype == np.float16
391 392


393 394 395
def test_optimize_for_inference_broadcast():
    a = tensor(np.ones(1, dtype=np.float32))

396
    @trace(capture_as_const=True, symbolic_shape=True)
397
    def f():
398
        return a._broadcast(tensor([1, 10], dtype=np.int32))
399 400 401 402 403

    f()
    f.dump(io.BytesIO())


404 405 406 407 408
def test_trace_cvt_bool():
    x = tensor([0], dtype=np.int32)

    @trace(symbolic=True)
    def f(x):
409 410 411 412
        a = x.shape
        b = a[0]
        assert isscalar(b)
        return b == 0
413 414

    for i in range(3):
415
        np.testing.assert_equal(f(x).numpy(), False)
416 417


418 419 420 421 422
@pytest.mark.parametrize("trace_mode", [False, True])
def test_trace_reshape(trace_mode):
    x1 = tensor(np.random.randn(2, 10, 10))
    x2 = tensor(np.random.randn(4, 10, 10))
    x3 = tensor(np.random.randn(8, 10, 10))
423

424 425 426 427
    @trace(symbolic=trace_mode, capture_as_const=True)
    def f(x):
        y = x.reshape(x.shape[0], 100)
        return y
428

429 430 431
    f(x1)
    f(x2)
    f(x3)
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458


def test_trace_topk():
    x = tensor([5, 2, 7, 1, 0, 3, 2])

    @trace(symbolic=True)
    def f(x):
        y = F.topk(x, 3)
        np.testing.assert_equal(y[0].shape.numpy(), np.array([3,]))
        return y

    for i in range(3):
        f(x)


def test_trace_warp_perspective():
    inp_shape = (1, 1, 4, 4)
    x = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape))
    M_shape = (1, 3, 3)
    M = tensor(
        np.array(
            [[1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0]], dtype=np.float32
        ).reshape(M_shape)
    )

    @trace(symbolic=True)
    def f(x, M):
459
        out = F.vision.warp_perspective(x, M, (2, 2))
460 461 462
        np.testing.assert_equal(out.shape.numpy(), np.array([1, 1, 2, 2]))
        return out

463
    for i in range(3):
464
        f(x, M)
465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496


def test_raise_on_trace():
    step_count = 0
    catch_count = 0
    bad_step = 10

    class CatchMe(Exception):
        pass

    a = tensor([1, 2, 3, 4])
    b = tensor([5, 6, 7, 8])
    c = tensor([9, 0, 1, 2])

    @trace
    def add_abc(a, b, c):
        ps = a + b
        result = ps + c
        if step_count == bad_step:
            raise CatchMe("catch me")
        return result

    for i in range(100):
        try:
            d = add_abc(a, b, c)
        except CatchMe as e:
            catch_count += 1
        else:
            np.testing.assert_equal(d.numpy(), (a + b + c).numpy())
        step_count += 1

    assert catch_count == 1
497 498


499 500 501 502 503
@pytest.mark.parametrize("trace_mode", [False, True])
def test_trace_broadcast(trace_mode):
    x1 = tensor(np.random.randn(3, 1, 1))
    x2 = tensor(np.random.randn(1, 4, 1))
    x3 = tensor(np.random.randn(1, 1, 5))
504

505 506 507 508
    @trace(symbolic=trace_mode, capture_as_const=True)
    def f(x):
        y = F.broadcast_to(x, (3, 4, 5))
        return y
509

510 511 512
    f(x1)
    f(x2)
    f(x3)
513 514 515 516 517 518 519 520 521 522 523 524 525 526


def test_trace_nms():
    def make_inputs(n):
        boxes = np.zeros((n, 4))
        boxes[:, :2] = np.random.rand(n, 2) * 100
        boxes[:, 2:] = np.random.rand(n, 2) * 100 + 100

        scores = np.random.rand(n)

        return tensor(boxes), tensor(scores)

    @trace(symbolic=False)
    def f(boxes, scores):
527
        # with tracing, max_output must be specified
528
        results = F.vision.nms(boxes, scores=scores, iou_thresh=0.5, max_output=20)
529
        # without tracing, max output can be inferred inside nms
530
        with exclude_from_trace():
531
            _ = F.vision.nms(boxes, scores=scores, iou_thresh=0.5)
532 533 534 535 536
        return results

    f(*make_inputs(10))
    f(*make_inputs(20))
    f(*make_inputs(30))
537 538 539 540 541 542 543 544 545 546 547 548 549 550


def test_trace_valid_broadcast():
    x1 = tensor(np.random.randn(1, 1))
    x2 = tensor(np.random.randn(1, 2))
    shape = (tensor([2]), tensor([2]))

    @trace(symbolic=False)
    def f(x, shape):
        y = F.broadcast_to(x, shape)
        return y

    f(x1, shape)
    f(x2, shape)
551 552


553 554
@pytest.mark.parametrize("trace_mode", [False, True])
def test_clip(trace_mode):
555 556
    x = tensor(np.random.randn(10, 10))

557
    @trace(symbolic=trace_mode)
558 559 560 561 562 563
    def f(x, lower, upper):
        y = F.clip(x, lower, upper)
        return y

    for i in range(3):
        f(x, tensor([0]), tensor([1]))
564

565 566 567
    for i in range(3):
        f(x, tensor([5]), tensor([4]))

568 569 570 571 572 573 574 575 576 577 578 579

# test returning noncontiguous tensor from trace
def test_slice():
    @trace
    def f(x):
        return x[:, 1::2]

    x = F.arange(8).reshape(2, 4)
    f(x)
    y = f(x)
    np.testing.assert_array_equal(y.numpy(), x.numpy()[:, 1::2])
    y + y
580 581


582 583
@pytest.mark.parametrize("shape_mode", [False, True])
def test_random(shape_mode):
584
    def run_test(op):
585 586 587 588 589 590 591 592 593 594 595
        @trace(symbolic=True, symbolic_shape=shape_mode)
        def f():
            out = op(size=[10, 10])
            out_shape = out.shape
            assert out_shape is not None
            if not isinstance(out_shape, tuple):
                assert out.shape.numpy() is not None
            return out

        for _ in range(3):
            f()
596 597 598

    run_test(uniform)
    run_test(normal)
599 600 601 602 603 604


@pytest.mark.parametrize("shape_mode", [False, True])
def test_trace_advance_indexing(shape_mode):
    funcs = [
        lambda x, i: x[i],
605
        lambda x, i, j: x[i, j],
606
        lambda x, i, j: x[i, :, j, ...],
607
        lambda x, start, end: x[start:end],
608 609 610 611 612 613 614 615 616 617
        lambda x, start, end: x[:, 0, start:end, ..., 1],
        lambda x, vec: x[vec],
        lambda x, vec: x[vec, ..., 0, 1:3],
        lambda x, vec: x[vec, vec[0], vec[1]],
        # lambda x, i, start, end, vec: x[i, ..., :, vec, start:end],  # FIXME
        lambda x, mask: x[mask],
    ]

    inputs = {
        "x": np.random.randn(5, 5, 5, 5, 5).astype("float32"),
618
        "i": 4,
619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
        "j": 2,
        "start": 1,
        "end": 3,
        "vec": [1, 2, 3],
        "mask": np.random.randn(5, 5, 5, 5, 5) >= 0,
    }
    for f in funcs:
        sig = inspect.signature(f)
        param_names = list(sig._parameters.keys())
        params = {}
        params_np = {}
        f_traced = trace(f, symbolic=False, symbolic_shape=shape_mode)
        for name in param_names:
            params[name] = tensor(inputs[name])
            params_np[name] = inputs[name]
        expected = f(**params_np)
        result_imperative = f(**params)
        np.testing.assert_equal(expected, result_imperative.numpy())
        for _ in range(3):
            result_trace = f_traced(**params)
            np.testing.assert_equal(expected, result_trace.numpy())
640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666


@pytest.mark.require_ngpu(1)  # nvrtc backend
def test_trace_jit_config():
    def run(fuse_dimshuffle, fuse_reduce):
        config = GraphOptimizationConfig()
        config.jit_fuse_dimshuffle = fuse_dimshuffle
        config.jit_fuse_reduce = fuse_reduce

        # set opt_level = 1 to avoid fusing dimshuffle and reduce at the same time
        @trace(opt_level=1, graph_opt_config=config)
        def func(x):
            return x + 1

        x = tensor(2)
        y = func(x)
        func._compile()

        options = func._graph.options
        mapping = {None: 0, False: 1, True: 2}
        assert options.graph_opt.jit == 0
        assert options.graph_opt.jit_config.fuse_dimshuffle == mapping[fuse_dimshuffle]
        assert options.graph_opt.jit_config.fuse_reduce == mapping[fuse_reduce]

    for fuse_dimshuffle in [None, False, True]:
        for fuse_reduce in [None, False, True]:
            run(fuse_dimshuffle, fuse_reduce)