test_tracing.py 16.8 KB
Newer Older
1 2 3
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
4
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 6 7 8
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
import inspect
M
Megvii Engine Team 已提交
10
import io
11
import itertools
12
from tempfile import mkstemp
M
Megvii Engine Team 已提交
13

M
Megvii Engine Team 已提交
14
import numpy as np
15
import pytest
M
Megvii Engine Team 已提交
16

17
import megengine.core.tensor.megbrain_graph as G
18
import megengine.functional as F
19
import megengine.optimizer as optim
20
import megengine.utils.comp_graph_tools as cgtools
21 22
from megengine import Parameter, tensor
from megengine.autodiff import GradManager
23
from megengine.core._trace_option import set_symbolic_shape
M
Megvii Engine Team 已提交
24
from megengine.core.ops import builtin as ops
25
from megengine.core.ops.builtin import Elemwise
26
from megengine.core.tensor.utils import isscalar
27
from megengine.functional import exp, log
28
from megengine.jit import GraphOptimizationConfig, exclude_from_trace, trace
29
from megengine.module import Module
30
from megengine.random import normal, uniform
31
from megengine.utils.naming import AutoNaming
M
Megvii Engine Team 已提交
32 33


34 35 36 37 38 39 40 41 42 43 44 45
@pytest.mark.parametrize("trace_mode", [False, True])
@pytest.mark.parametrize("return_mode", ["Value", "Tuple", "List", "Dict"])
def test_trace(trace_mode, return_mode):
    @trace(symbolic=trace_mode)
    def f(x):
        if return_mode == "Tuple":
            return (-x,)
        elif return_mode == "List":
            return [-x]
        elif return_mode == "Dict":
            return {"neg": -x}
        else:
46
            return -x
M
Megvii Engine Team 已提交
47

48 49 50 51 52 53
    def get_numpy(y):
        if return_mode == "Tuple" or return_mode == "List":
            return y[0].numpy()
        elif return_mode == "Dict":
            return y["neg"].numpy()
        return y.numpy()
M
Megvii Engine Team 已提交
54

55 56 57 58 59
    x = tensor([1])
    y = get_numpy(f(x))

    for i in range(3):
        np.testing.assert_equal(get_numpy(f(x)), y)
M
Megvii Engine Team 已提交
60 61


62 63 64 65 66 67 68 69 70 71 72 73
def test_output_copy_trace():
    class Simple(Module):
        def __init__(self):
            super().__init__()
            self.a = Parameter([1.0], dtype=np.float32)

        def forward(self, x):
            x = x * self.a
            # will result into a copy of output in grad
            x = F.exp(x)
            return x

74
    ys = {False: [], True: []}
75

76 77 78 79 80
    for symbolic in [False, True]:
        net = Simple()
        gm = GradManager().attach(net.parameters())
        opt = optim.SGD(net.parameters(), 1e-3, momentum=0.9)
        data = tensor(np.arange(4).reshape(2, 2), dtype="float32")
81

82 83 84 85 86 87 88
        @trace(symbolic=symbolic)
        def train_func(d):
            with gm:
                loss = net(d)
                gm.backward(loss)
                opt.step().clear_grad()
            return loss
89

90 91 92
        for i in range(3):
            y = train_func(data).numpy()
            ys[symbolic].append(y)
93

94 95
    for i in range(3):
        np.testing.assert_equal(ys[False][i], ys[True][i])
96

M
Megvii Engine Team 已提交
97

98 99 100 101 102 103 104 105 106 107 108 109 110
@pytest.mark.parametrize("trace_mode", [False, True])
def test_tensor_detach(trace_mode):
    @trace(symbolic=True)
    def f(x):
        y = x.detach() ** 2
        z = y.detach() + 1
        return z.detach()

    x = tensor([1, 2, 3, 4])
    for _ in range(3):
        f(x).numpy()


111 112 113 114 115 116 117 118 119 120
@pytest.mark.parametrize("trace_mode", [False, True])
def test_exclude_from_trace(trace_mode):
    @trace(symbolic=trace_mode)
    def f(x):
        x = -x
        with exclude_from_trace():
            if i % 2:
                x = -x
        x = -x
        return x
M
Megvii Engine Team 已提交
121

122
    x = tensor([1])
M
Megvii Engine Team 已提交
123

124 125 126
    for i in range(3):
        y = f(x).numpy()
        np.testing.assert_equal(f(x).numpy(), y)
M
Megvii Engine Team 已提交
127 128


129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
@pytest.mark.parametrize("trace_mode", [False, True])
def test_elemwise_fuse(trace_mode):
    # explicitly declare opt_level as 2
    @trace(symbolic=trace_mode, opt_level=2)
    def f(a, b):
        base = 0
        c = b - a
        _, idx = F.topk(c, 3)
        # internally, biased_idx will be idx as gopt will ignore the addition
        biased_idx = base + idx
        return biased_idx

    a = tensor(np.ones((7, 2)), dtype=np.int32)
    b = tensor(2 * np.ones((7, 2)), dtype=np.float32)

    for i in range(3):
        y = f(a, b)
        y.numpy()


@pytest.mark.parametrize("trace_mode", [False, True])
def test_elemwise_fuse_in_grad(trace_mode):
    w = Parameter(np.ones([4, 6]), dtype="float32")

    gm = GradManager().attach(w)
    opt = optim.SGD([w], lr=0.01, momentum=0.9, weight_decay=5e-4)

    # explicitly declare opt_level as 2
    @trace(symbolic=trace_mode, opt_level=2)
    def f():
        with gm:
            wm = F.sum(w ** 2, axis=1) ** 0.5
            loss = wm.mean()
            gm.backward(loss)
            opt.step().clear_grad()
        return loss

    for i in range(3):
        y = f()
        y.numpy()


M
Megvii Engine Team 已提交
171 172 173 174 175 176
def test_print_in_trace():
    for symbolic in [False]:  # cannot read value in symbolic mode

        @trace(symbolic=symbolic)
        def f(x):
            nonlocal buf
177
            x = -x
M
Megvii Engine Team 已提交
178
            buf = x.numpy()
179
            x = -x
M
Megvii Engine Team 已提交
180 181 182
            return x

        buf = None
183
        x = tensor([1])
M
Megvii Engine Team 已提交
184 185

        for i in range(3):
186
            y = f(x).numpy()
M
Megvii Engine Team 已提交
187 188
            z = buf
            buf = None
189
            np.testing.assert_equal(f(x).numpy(), y)
M
Megvii Engine Team 已提交
190
            np.testing.assert_equal(z, buf)
M
Megvii Engine Team 已提交
191 192 193


def test_dump():
194 195
    @trace(symbolic=True, capture_as_const=True)
    def f(a, b):
196
        return a + b
197

198
    # prevent from remaining scope from exception test
199
    AutoNaming.clear()
200 201 202
    a = tensor([2])
    b = tensor([4])
    y = f(a, b).numpy()
203 204

    for i in range(3):
205
        np.testing.assert_equal(f(a, b).numpy(), y)
206 207

    file = io.BytesIO()
208 209
    dump_info = f.dump(file)
    assert dump_info.nr_opr == 3
210
    np.testing.assert_equal(dump_info.inputs, ["arg_0", "arg_1"])
211
    np.testing.assert_equal(dump_info.outputs, ["ADD"])
212
    file.seek(0)
213 214
    infer_cg = cgtools.GraphInference(file)
    result = list((infer_cg.run(a, b)).values())[0]
215 216 217 218
    np.testing.assert_equal(result[0], y)


def test_capture_dump():
219
    a = tensor([2])
220 221 222

    @trace(symbolic=True, capture_as_const=True)
    def f(x):
223
        return x * a
224

225 226
    x = tensor([3])
    y = f(x).numpy()
227 228

    for i in range(3):
229
        np.testing.assert_equal(f(x).numpy(), y)
230 231 232 233

    file = io.BytesIO()
    f.dump(file)
    file.seek(0)
234 235
    infer_cg = cgtools.GraphInference(file)
    result = list((infer_cg.run(x)).values())[0]
236 237 238 239
    np.testing.assert_equal(result[0], y)


def test_dump_volatile():
240
    p = tensor([2])
241

M
Megvii Engine Team 已提交
242 243
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
244
        return x * p
M
Megvii Engine Team 已提交
245

246 247
    x = tensor([3])
    y = f(x).numpy()
M
Megvii Engine Team 已提交
248 249

    for i in range(3):
250
        np.testing.assert_equal(f(x).numpy(), y)
M
Megvii Engine Team 已提交
251 252

    file = io.BytesIO()
253
    f.dump(file, optimize_for_inference=False)
254
    file.seek(0)
255
    (out,) = G.load_graph(file).output_vars_list
256 257
    assert (
        cgtools.get_owner_opr_type(cgtools.get_owner_opr_inputs(out)[1])
258
        == "ImmutableTensor"
259
    )
260 261


262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
def test_dump_backward_graph():
    x0 = tensor(np.random.randn(3, 4))
    x1 = tensor(np.random.randn(3, 4))

    gm = GradManager().attach(x0)

    @trace(symbolic=True, capture_as_const=True)
    def f(x0, x1):
        with gm:
            y = x0 * x1
            gm.backward(y, F.ones_like(y))
            dx0 = x0.grad
        return y, dx0

    y, dx0 = f(x0, x1)
    np.testing.assert_equal(dx0.numpy(), x1)

    file = io.BytesIO()
    f.dump(file, optimize_for_inference=False)
    file.seek(0)

    infer_cg = cgtools.GraphInference(file)
    results = list((infer_cg.run(x0, x1)).values())

    np.testing.assert_equal(results[0], y)
    np.testing.assert_equal(results[1], dx0)


290 291 292 293 294 295 296 297 298 299
def test_dump_with_testcase():
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
        return exp(x)

    f(tensor(1.0))
    file = io.BytesIO()
    f.dump(file, input_data=["#rand(0, 255, 1)"])


300 301 302 303 304
@pytest.mark.parametrize("trace_mode", [False, True])
def test_trace_profiler(trace_mode):
    @trace(symbolic=trace_mode, profiling=True)
    def f(x):
        return -x
305

306 307
    x = tensor([1])
    y = f(x).numpy()
308

309 310
    f(x)
    f(x)  # XXX: has to run twice
311

312 313
    out = f.get_profile()
    assert out.get("profiler")
314 315


316
def test_goptions():
317 318
    @trace(symbolic=True, opt_level=0, capture_as_const=True)
    def f(x):
319 320 321 322
        # directly return x / x will not trigger gopt
        # since there's no way to tell the two x are the same
        y = 2.0 * x
        return y / y
323 324 325

    @trace(symbolic=True, opt_level=1, capture_as_const=True)
    def g(x):
326 327
        y = 2.0 * x
        return y / y
328

329 330 331
    d = tensor(0.0)
    assert not np.isfinite(f(d).numpy())
    np.testing.assert_equal(g(d).numpy().item(), 1.0)
332 333 334 335 336 337 338 339 340 341 342


def test_goptions_log_sum_exp():
    @trace(symbolic=True, opt_level=0, capture_as_const=True)
    def f(x, y):
        return log(exp(x) + exp(y))

    @trace(symbolic=True, opt_level=1, capture_as_const=True)
    def g(x, y):
        return log(exp(x) + exp(y))

343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
    val = 1.0e4
    d = tensor(val)
    o = tensor(0.0)
    assert not np.isfinite(f(d, o).numpy())
    np.testing.assert_almost_equal(g(d, o), val)


def test_goptions_log_exp():
    @trace(symbolic=True, opt_level=0, capture_as_const=True)
    def f(x):
        return log(exp(x))

    @trace(symbolic=True, opt_level=1, capture_as_const=True)
    def g(x):
        return log(exp(x))

    f(tensor(1.0))
360
    _, out = mkstemp()
361
    f.dump(out, optimize_for_inference=False)
362
    outputs = G.load_graph(out).output_vars_list
363 364
    oprs_1 = cgtools.get_oprs_seq(outputs)

365
    g(tensor(1.0))
366
    g.dump(out, optimize_for_inference=False)
367
    outputs = G.load_graph(out).output_vars_list
368 369 370 371 372 373 374 375 376 377 378 379
    oprs_2 = cgtools.get_oprs_seq(outputs)

    assert len(oprs_1) - len(oprs_2) == 2


def test_optimize_for_inference():
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
        return exp(x)

    _, out = mkstemp()
    f(tensor(5.0))
380
    f.dump(out, enable_io16xc32=True)
381

382
    res = G.load_graph(out)
383 384
    computing_input = res.output_vars_list[0].owner.inputs[0]
    assert computing_input.dtype == np.float16
385 386


387 388 389
def test_optimize_for_inference_broadcast():
    a = tensor(np.ones(1, dtype=np.float32))

390
    @trace(capture_as_const=True, symbolic_shape=True)
391
    def f():
392
        return a._broadcast(tensor([1, 10], dtype=np.int32))
393 394 395 396 397

    f()
    f.dump(io.BytesIO())


398 399 400 401 402
def test_trace_cvt_bool():
    x = tensor([0], dtype=np.int32)

    @trace(symbolic=True)
    def f(x):
403 404 405 406
        a = x.shape
        b = a[0]
        assert isscalar(b)
        return b == 0
407 408

    for i in range(3):
409
        np.testing.assert_equal(f(x).numpy(), False)
410 411


412 413 414 415 416
@pytest.mark.parametrize("trace_mode", [False, True])
def test_trace_reshape(trace_mode):
    x1 = tensor(np.random.randn(2, 10, 10))
    x2 = tensor(np.random.randn(4, 10, 10))
    x3 = tensor(np.random.randn(8, 10, 10))
417

418 419 420 421
    @trace(symbolic=trace_mode, capture_as_const=True)
    def f(x):
        y = x.reshape(x.shape[0], 100)
        return y
422

423 424 425
    f(x1)
    f(x2)
    f(x3)
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452


def test_trace_topk():
    x = tensor([5, 2, 7, 1, 0, 3, 2])

    @trace(symbolic=True)
    def f(x):
        y = F.topk(x, 3)
        np.testing.assert_equal(y[0].shape.numpy(), np.array([3,]))
        return y

    for i in range(3):
        f(x)


def test_trace_warp_perspective():
    inp_shape = (1, 1, 4, 4)
    x = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape))
    M_shape = (1, 3, 3)
    M = tensor(
        np.array(
            [[1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0]], dtype=np.float32
        ).reshape(M_shape)
    )

    @trace(symbolic=True)
    def f(x, M):
453
        out = F.vision.warp_perspective(x, M, (2, 2))
454 455 456
        np.testing.assert_equal(out.shape.numpy(), np.array([1, 1, 2, 2]))
        return out

457
    for i in range(3):
458
        f(x, M)
459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490


def test_raise_on_trace():
    step_count = 0
    catch_count = 0
    bad_step = 10

    class CatchMe(Exception):
        pass

    a = tensor([1, 2, 3, 4])
    b = tensor([5, 6, 7, 8])
    c = tensor([9, 0, 1, 2])

    @trace
    def add_abc(a, b, c):
        ps = a + b
        result = ps + c
        if step_count == bad_step:
            raise CatchMe("catch me")
        return result

    for i in range(100):
        try:
            d = add_abc(a, b, c)
        except CatchMe as e:
            catch_count += 1
        else:
            np.testing.assert_equal(d.numpy(), (a + b + c).numpy())
        step_count += 1

    assert catch_count == 1
491 492


493 494 495 496 497
@pytest.mark.parametrize("trace_mode", [False, True])
def test_trace_broadcast(trace_mode):
    x1 = tensor(np.random.randn(3, 1, 1))
    x2 = tensor(np.random.randn(1, 4, 1))
    x3 = tensor(np.random.randn(1, 1, 5))
498

499 500 501 502
    @trace(symbolic=trace_mode, capture_as_const=True)
    def f(x):
        y = F.broadcast_to(x, (3, 4, 5))
        return y
503

504 505 506
    f(x1)
    f(x2)
    f(x3)
507 508 509 510 511 512 513 514 515 516 517 518 519 520


def test_trace_nms():
    def make_inputs(n):
        boxes = np.zeros((n, 4))
        boxes[:, :2] = np.random.rand(n, 2) * 100
        boxes[:, 2:] = np.random.rand(n, 2) * 100 + 100

        scores = np.random.rand(n)

        return tensor(boxes), tensor(scores)

    @trace(symbolic=False)
    def f(boxes, scores):
521
        # with tracing, max_output must be specified
522
        results = F.vision.nms(boxes, scores=scores, iou_thresh=0.5, max_output=20)
523
        # without tracing, max output can be inferred inside nms
524
        with exclude_from_trace():
525
            _ = F.vision.nms(boxes, scores=scores, iou_thresh=0.5)
526 527 528 529 530
        return results

    f(*make_inputs(10))
    f(*make_inputs(20))
    f(*make_inputs(30))
531 532 533 534 535 536 537 538 539 540 541 542 543 544


def test_trace_valid_broadcast():
    x1 = tensor(np.random.randn(1, 1))
    x2 = tensor(np.random.randn(1, 2))
    shape = (tensor([2]), tensor([2]))

    @trace(symbolic=False)
    def f(x, shape):
        y = F.broadcast_to(x, shape)
        return y

    f(x1, shape)
    f(x2, shape)
545 546


547 548
@pytest.mark.parametrize("trace_mode", [False, True])
def test_clip(trace_mode):
549 550
    x = tensor(np.random.randn(10, 10))

551
    @trace(symbolic=trace_mode)
552 553 554 555 556 557
    def f(x, lower, upper):
        y = F.clip(x, lower, upper)
        return y

    for i in range(3):
        f(x, tensor([0]), tensor([1]))
558

559 560 561
    for i in range(3):
        f(x, tensor([5]), tensor([4]))

562 563 564 565 566 567 568 569 570 571 572 573

# test returning noncontiguous tensor from trace
def test_slice():
    @trace
    def f(x):
        return x[:, 1::2]

    x = F.arange(8).reshape(2, 4)
    f(x)
    y = f(x)
    np.testing.assert_array_equal(y.numpy(), x.numpy()[:, 1::2])
    y + y
574 575


576 577
@pytest.mark.parametrize("shape_mode", [False, True])
def test_random(shape_mode):
578
    def run_test(op):
579 580 581 582 583 584 585 586 587 588 589
        @trace(symbolic=True, symbolic_shape=shape_mode)
        def f():
            out = op(size=[10, 10])
            out_shape = out.shape
            assert out_shape is not None
            if not isinstance(out_shape, tuple):
                assert out.shape.numpy() is not None
            return out

        for _ in range(3):
            f()
590 591 592

    run_test(uniform)
    run_test(normal)
593 594 595 596 597 598


@pytest.mark.parametrize("shape_mode", [False, True])
def test_trace_advance_indexing(shape_mode):
    funcs = [
        lambda x, i: x[i],
599
        lambda x, i, j: x[i, j],
600
        lambda x, i, j: x[i, :, j, ...],
601
        lambda x, start, end: x[start:end],
602 603 604 605 606 607 608 609 610 611
        lambda x, start, end: x[:, 0, start:end, ..., 1],
        lambda x, vec: x[vec],
        lambda x, vec: x[vec, ..., 0, 1:3],
        lambda x, vec: x[vec, vec[0], vec[1]],
        # lambda x, i, start, end, vec: x[i, ..., :, vec, start:end],  # FIXME
        lambda x, mask: x[mask],
    ]

    inputs = {
        "x": np.random.randn(5, 5, 5, 5, 5).astype("float32"),
612
        "i": 4,
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633
        "j": 2,
        "start": 1,
        "end": 3,
        "vec": [1, 2, 3],
        "mask": np.random.randn(5, 5, 5, 5, 5) >= 0,
    }
    for f in funcs:
        sig = inspect.signature(f)
        param_names = list(sig._parameters.keys())
        params = {}
        params_np = {}
        f_traced = trace(f, symbolic=False, symbolic_shape=shape_mode)
        for name in param_names:
            params[name] = tensor(inputs[name])
            params_np[name] = inputs[name]
        expected = f(**params_np)
        result_imperative = f(**params)
        np.testing.assert_equal(expected, result_imperative.numpy())
        for _ in range(3):
            result_trace = f_traced(**params)
            np.testing.assert_equal(expected, result_trace.numpy())
634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660


@pytest.mark.require_ngpu(1)  # nvrtc backend
def test_trace_jit_config():
    def run(fuse_dimshuffle, fuse_reduce):
        config = GraphOptimizationConfig()
        config.jit_fuse_dimshuffle = fuse_dimshuffle
        config.jit_fuse_reduce = fuse_reduce

        # set opt_level = 1 to avoid fusing dimshuffle and reduce at the same time
        @trace(opt_level=1, graph_opt_config=config)
        def func(x):
            return x + 1

        x = tensor(2)
        y = func(x)
        func._compile()

        options = func._graph.options
        mapping = {None: 0, False: 1, True: 2}
        assert options.graph_opt.jit == 0
        assert options.graph_opt.jit_config.fuse_dimshuffle == mapping[fuse_dimshuffle]
        assert options.graph_opt.jit_config.fuse_reduce == mapping[fuse_reduce]

    for fuse_dimshuffle in [None, False, True]:
        for fuse_reduce in [None, False, True]:
            run(fuse_dimshuffle, fuse_reduce)