test_tracing.py 16.6 KB
Newer Older
1 2 3
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
4
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 6 7 8
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
import inspect
M
Megvii Engine Team 已提交
10
import io
11
import itertools
12
from tempfile import mkstemp
M
Megvii Engine Team 已提交
13

M
Megvii Engine Team 已提交
14
import numpy as np
15
import pytest
M
Megvii Engine Team 已提交
16

17
import megengine.core.tensor.megbrain_graph as G
18
import megengine.functional as F
19
import megengine.optimizer as optim
20
import megengine.utils.comp_graph_tools as cgtools
21 22
from megengine import Parameter, tensor
from megengine.autodiff import GradManager
23
from megengine.core._trace_option import set_symbolic_shape
M
Megvii Engine Team 已提交
24
from megengine.core.ops import builtin as ops
25
from megengine.core.ops.builtin import Elemwise
26
from megengine.core.tensor.utils import isscalar
27
from megengine.functional import exp, log
28
from megengine.jit import GraphOptimizationConfig, exclude_from_trace, trace
29
from megengine.module import Module
30
from megengine.random import normal, uniform
31
from megengine.utils.naming import AutoNaming
M
Megvii Engine Team 已提交
32 33


34 35 36 37 38 39 40 41 42 43 44 45
@pytest.mark.parametrize("trace_mode", [False, True])
@pytest.mark.parametrize("return_mode", ["Value", "Tuple", "List", "Dict"])
def test_trace(trace_mode, return_mode):
    @trace(symbolic=trace_mode)
    def f(x):
        if return_mode == "Tuple":
            return (-x,)
        elif return_mode == "List":
            return [-x]
        elif return_mode == "Dict":
            return {"neg": -x}
        else:
46
            return -x
M
Megvii Engine Team 已提交
47

48 49 50 51 52 53
    def get_numpy(y):
        if return_mode == "Tuple" or return_mode == "List":
            return y[0].numpy()
        elif return_mode == "Dict":
            return y["neg"].numpy()
        return y.numpy()
M
Megvii Engine Team 已提交
54

55 56 57 58 59
    x = tensor([1])
    y = get_numpy(f(x))

    for i in range(3):
        np.testing.assert_equal(get_numpy(f(x)), y)
M
Megvii Engine Team 已提交
60 61


62 63 64 65 66 67 68 69 70 71 72 73
def test_output_copy_trace():
    class Simple(Module):
        def __init__(self):
            super().__init__()
            self.a = Parameter([1.0], dtype=np.float32)

        def forward(self, x):
            x = x * self.a
            # will result into a copy of output in grad
            x = F.exp(x)
            return x

74
    ys = {False: [], True: []}
75

76 77 78 79 80
    for symbolic in [False, True]:
        net = Simple()
        gm = GradManager().attach(net.parameters())
        opt = optim.SGD(net.parameters(), 1e-3, momentum=0.9)
        data = tensor(np.arange(4).reshape(2, 2), dtype="float32")
81

82 83 84 85 86 87 88
        @trace(symbolic=symbolic)
        def train_func(d):
            with gm:
                loss = net(d)
                gm.backward(loss)
                opt.step().clear_grad()
            return loss
89

90 91 92
        for i in range(3):
            y = train_func(data).numpy()
            ys[symbolic].append(y)
93

94 95
    for i in range(3):
        np.testing.assert_equal(ys[False][i], ys[True][i])
96

M
Megvii Engine Team 已提交
97

98 99 100 101 102 103 104 105 106 107 108 109 110
@pytest.mark.parametrize("trace_mode", [False, True])
def test_tensor_detach(trace_mode):
    @trace(symbolic=True)
    def f(x):
        y = x.detach() ** 2
        z = y.detach() + 1
        return z.detach()

    x = tensor([1, 2, 3, 4])
    for _ in range(3):
        f(x).numpy()


111 112 113 114 115 116 117 118 119 120
@pytest.mark.parametrize("trace_mode", [False, True])
def test_exclude_from_trace(trace_mode):
    @trace(symbolic=trace_mode)
    def f(x):
        x = -x
        with exclude_from_trace():
            if i % 2:
                x = -x
        x = -x
        return x
M
Megvii Engine Team 已提交
121

122
    x = tensor([1])
M
Megvii Engine Team 已提交
123

124 125 126
    for i in range(3):
        y = f(x).numpy()
        np.testing.assert_equal(f(x).numpy(), y)
M
Megvii Engine Team 已提交
127 128


129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
@pytest.mark.parametrize("trace_mode", [False, True])
def test_elemwise_fuse(trace_mode):
    # explicitly declare opt_level as 2
    @trace(symbolic=trace_mode, opt_level=2)
    def f(a, b):
        base = 0
        c = b - a
        _, idx = F.topk(c, 3)
        # internally, biased_idx will be idx as gopt will ignore the addition
        biased_idx = base + idx
        return biased_idx

    a = tensor(np.ones((7, 2)), dtype=np.int32)
    b = tensor(2 * np.ones((7, 2)), dtype=np.float32)

    for i in range(3):
        y = f(a, b)
        y.numpy()


@pytest.mark.parametrize("trace_mode", [False, True])
def test_elemwise_fuse_in_grad(trace_mode):
    w = Parameter(np.ones([4, 6]), dtype="float32")

    gm = GradManager().attach(w)
    opt = optim.SGD([w], lr=0.01, momentum=0.9, weight_decay=5e-4)

    # explicitly declare opt_level as 2
    @trace(symbolic=trace_mode, opt_level=2)
    def f():
        with gm:
            wm = F.sum(w ** 2, axis=1) ** 0.5
            loss = wm.mean()
            gm.backward(loss)
            opt.step().clear_grad()
        return loss

    for i in range(3):
        y = f()
        y.numpy()


M
Megvii Engine Team 已提交
171 172 173 174 175 176
def test_print_in_trace():
    for symbolic in [False]:  # cannot read value in symbolic mode

        @trace(symbolic=symbolic)
        def f(x):
            nonlocal buf
177
            x = -x
M
Megvii Engine Team 已提交
178
            buf = x.numpy()
179
            x = -x
M
Megvii Engine Team 已提交
180 181 182
            return x

        buf = None
183
        x = tensor([1])
M
Megvii Engine Team 已提交
184 185

        for i in range(3):
186
            y = f(x).numpy()
M
Megvii Engine Team 已提交
187 188
            z = buf
            buf = None
189
            np.testing.assert_equal(f(x).numpy(), y)
M
Megvii Engine Team 已提交
190
            np.testing.assert_equal(z, buf)
M
Megvii Engine Team 已提交
191 192 193


def test_dump():
194 195
    @trace(symbolic=True, capture_as_const=True)
    def f(a, b):
196
        return a + b
197

198
    # prevent from remaining scope from exception test
199
    AutoNaming.clear()
200 201 202
    a = tensor([2])
    b = tensor([4])
    y = f(a, b).numpy()
203 204

    for i in range(3):
205
        np.testing.assert_equal(f(a, b).numpy(), y)
206 207

    file = io.BytesIO()
208 209
    dump_info = f.dump(file)
    assert dump_info.nr_opr == 3
210
    np.testing.assert_equal(dump_info.inputs, ["arg_0", "arg_1"])
211
    np.testing.assert_equal(dump_info.outputs, ["ADD"])
212
    file.seek(0)
213 214
    infer_cg = cgtools.GraphInference(file)
    result = list((infer_cg.run(a, b)).values())[0]
215 216 217 218
    np.testing.assert_equal(result[0], y)


def test_capture_dump():
219
    a = tensor([2])
220 221 222

    @trace(symbolic=True, capture_as_const=True)
    def f(x):
223
        return x * a
224

225 226
    x = tensor([3])
    y = f(x).numpy()
227 228

    for i in range(3):
229
        np.testing.assert_equal(f(x).numpy(), y)
230 231 232 233

    file = io.BytesIO()
    f.dump(file)
    file.seek(0)
234 235
    infer_cg = cgtools.GraphInference(file)
    result = list((infer_cg.run(x)).values())[0]
236 237 238 239
    np.testing.assert_equal(result[0], y)


def test_dump_volatile():
240
    p = tensor([2])
241

M
Megvii Engine Team 已提交
242 243
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
244
        return x * p
M
Megvii Engine Team 已提交
245

246 247
    x = tensor([3])
    y = f(x).numpy()
M
Megvii Engine Team 已提交
248 249

    for i in range(3):
250
        np.testing.assert_equal(f(x).numpy(), y)
M
Megvii Engine Team 已提交
251 252

    file = io.BytesIO()
253
    f.dump(file, optimize_for_inference=False)
254
    file.seek(0)
255
    (out,) = G.load_graph(file).output_vars_list
256 257
    assert (
        cgtools.get_owner_opr_type(cgtools.get_owner_opr_inputs(out)[1])
258
        == "ImmutableTensor"
259
    )
260 261


262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
def test_dump_backward_graph():
    x0 = tensor(np.random.randn(3, 4))
    x1 = tensor(np.random.randn(3, 4))

    gm = GradManager().attach(x0)

    @trace(symbolic=True, capture_as_const=True)
    def f(x0, x1):
        with gm:
            y = x0 * x1
            gm.backward(y, F.ones_like(y))
            dx0 = x0.grad
        return y, dx0

    y, dx0 = f(x0, x1)
    np.testing.assert_equal(dx0.numpy(), x1)

    file = io.BytesIO()
    f.dump(file, optimize_for_inference=False)
    file.seek(0)

    infer_cg = cgtools.GraphInference(file)
    results = list((infer_cg.run(x0, x1)).values())

    np.testing.assert_equal(results[0], y)
    np.testing.assert_equal(results[1], dx0)


290 291 292 293 294
@pytest.mark.parametrize("trace_mode", [False, True])
def test_trace_profiler(trace_mode):
    @trace(symbolic=trace_mode, profiling=True)
    def f(x):
        return -x
295

296 297
    x = tensor([1])
    y = f(x).numpy()
298

299 300
    f(x)
    f(x)  # XXX: has to run twice
301

302 303
    out = f.get_profile()
    assert out.get("profiler")
304 305


306
def test_goptions():
307 308
    @trace(symbolic=True, opt_level=0, capture_as_const=True)
    def f(x):
309 310 311 312
        # directly return x / x will not trigger gopt
        # since there's no way to tell the two x are the same
        y = 2.0 * x
        return y / y
313 314 315

    @trace(symbolic=True, opt_level=1, capture_as_const=True)
    def g(x):
316 317
        y = 2.0 * x
        return y / y
318

319 320 321
    d = tensor(0.0)
    assert not np.isfinite(f(d).numpy())
    np.testing.assert_equal(g(d).numpy().item(), 1.0)
322 323 324 325 326 327 328 329 330 331 332


def test_goptions_log_sum_exp():
    @trace(symbolic=True, opt_level=0, capture_as_const=True)
    def f(x, y):
        return log(exp(x) + exp(y))

    @trace(symbolic=True, opt_level=1, capture_as_const=True)
    def g(x, y):
        return log(exp(x) + exp(y))

333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
    val = 1.0e4
    d = tensor(val)
    o = tensor(0.0)
    assert not np.isfinite(f(d, o).numpy())
    np.testing.assert_almost_equal(g(d, o), val)


def test_goptions_log_exp():
    @trace(symbolic=True, opt_level=0, capture_as_const=True)
    def f(x):
        return log(exp(x))

    @trace(symbolic=True, opt_level=1, capture_as_const=True)
    def g(x):
        return log(exp(x))

    f(tensor(1.0))
350
    _, out = mkstemp()
351
    f.dump(out, optimize_for_inference=False)
352
    outputs = G.load_graph(out).output_vars_list
353 354
    oprs_1 = cgtools.get_oprs_seq(outputs)

355
    g(tensor(1.0))
356
    g.dump(out, optimize_for_inference=False)
357
    outputs = G.load_graph(out).output_vars_list
358 359 360 361 362 363 364 365 366 367 368 369
    oprs_2 = cgtools.get_oprs_seq(outputs)

    assert len(oprs_1) - len(oprs_2) == 2


def test_optimize_for_inference():
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
        return exp(x)

    _, out = mkstemp()
    f(tensor(5.0))
370
    f.dump(out, enable_io16xc32=True)
371

372
    res = G.load_graph(out)
373 374
    computing_input = res.output_vars_list[0].owner.inputs[0]
    assert computing_input.dtype == np.float16
375 376


377 378 379
def test_optimize_for_inference_broadcast():
    a = tensor(np.ones(1, dtype=np.float32))

380
    @trace(capture_as_const=True, symbolic_shape=True)
381
    def f():
382
        return a._broadcast(tensor([1, 10], dtype=np.int32))
383 384 385 386 387

    f()
    f.dump(io.BytesIO())


388 389 390 391 392
def test_trace_cvt_bool():
    x = tensor([0], dtype=np.int32)

    @trace(symbolic=True)
    def f(x):
393 394 395 396
        a = x.shape
        b = a[0]
        assert isscalar(b)
        return b == 0
397 398

    for i in range(3):
399
        np.testing.assert_equal(f(x).numpy(), False)
400 401


402 403 404 405 406
@pytest.mark.parametrize("trace_mode", [False, True])
def test_trace_reshape(trace_mode):
    x1 = tensor(np.random.randn(2, 10, 10))
    x2 = tensor(np.random.randn(4, 10, 10))
    x3 = tensor(np.random.randn(8, 10, 10))
407

408 409 410 411
    @trace(symbolic=trace_mode, capture_as_const=True)
    def f(x):
        y = x.reshape(x.shape[0], 100)
        return y
412

413 414 415
    f(x1)
    f(x2)
    f(x3)
416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442


def test_trace_topk():
    x = tensor([5, 2, 7, 1, 0, 3, 2])

    @trace(symbolic=True)
    def f(x):
        y = F.topk(x, 3)
        np.testing.assert_equal(y[0].shape.numpy(), np.array([3,]))
        return y

    for i in range(3):
        f(x)


def test_trace_warp_perspective():
    inp_shape = (1, 1, 4, 4)
    x = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape))
    M_shape = (1, 3, 3)
    M = tensor(
        np.array(
            [[1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0]], dtype=np.float32
        ).reshape(M_shape)
    )

    @trace(symbolic=True)
    def f(x, M):
443
        out = F.vision.warp_perspective(x, M, (2, 2))
444 445 446
        np.testing.assert_equal(out.shape.numpy(), np.array([1, 1, 2, 2]))
        return out

447
    for i in range(3):
448
        f(x, M)
449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480


def test_raise_on_trace():
    step_count = 0
    catch_count = 0
    bad_step = 10

    class CatchMe(Exception):
        pass

    a = tensor([1, 2, 3, 4])
    b = tensor([5, 6, 7, 8])
    c = tensor([9, 0, 1, 2])

    @trace
    def add_abc(a, b, c):
        ps = a + b
        result = ps + c
        if step_count == bad_step:
            raise CatchMe("catch me")
        return result

    for i in range(100):
        try:
            d = add_abc(a, b, c)
        except CatchMe as e:
            catch_count += 1
        else:
            np.testing.assert_equal(d.numpy(), (a + b + c).numpy())
        step_count += 1

    assert catch_count == 1
481 482


483 484 485 486 487
@pytest.mark.parametrize("trace_mode", [False, True])
def test_trace_broadcast(trace_mode):
    x1 = tensor(np.random.randn(3, 1, 1))
    x2 = tensor(np.random.randn(1, 4, 1))
    x3 = tensor(np.random.randn(1, 1, 5))
488

489 490 491 492
    @trace(symbolic=trace_mode, capture_as_const=True)
    def f(x):
        y = F.broadcast_to(x, (3, 4, 5))
        return y
493

494 495 496
    f(x1)
    f(x2)
    f(x3)
497 498 499 500 501 502 503 504 505 506 507 508 509 510


def test_trace_nms():
    def make_inputs(n):
        boxes = np.zeros((n, 4))
        boxes[:, :2] = np.random.rand(n, 2) * 100
        boxes[:, 2:] = np.random.rand(n, 2) * 100 + 100

        scores = np.random.rand(n)

        return tensor(boxes), tensor(scores)

    @trace(symbolic=False)
    def f(boxes, scores):
511
        # with tracing, max_output must be specified
512
        results = F.vision.nms(boxes, scores=scores, iou_thresh=0.5, max_output=20)
513
        # without tracing, max output can be inferred inside nms
514
        with exclude_from_trace():
515
            _ = F.vision.nms(boxes, scores=scores, iou_thresh=0.5)
516 517 518 519 520
        return results

    f(*make_inputs(10))
    f(*make_inputs(20))
    f(*make_inputs(30))
521 522 523 524 525 526 527 528 529 530 531 532 533 534


def test_trace_valid_broadcast():
    x1 = tensor(np.random.randn(1, 1))
    x2 = tensor(np.random.randn(1, 2))
    shape = (tensor([2]), tensor([2]))

    @trace(symbolic=False)
    def f(x, shape):
        y = F.broadcast_to(x, shape)
        return y

    f(x1, shape)
    f(x2, shape)
535 536


537 538
@pytest.mark.parametrize("trace_mode", [False, True])
def test_clip(trace_mode):
539 540
    x = tensor(np.random.randn(10, 10))

541
    @trace(symbolic=trace_mode)
542 543 544 545 546 547
    def f(x, lower, upper):
        y = F.clip(x, lower, upper)
        return y

    for i in range(3):
        f(x, tensor([0]), tensor([1]))
548

549 550 551
    for i in range(3):
        f(x, tensor([5]), tensor([4]))

552 553 554 555 556 557 558 559 560 561 562 563

# test returning noncontiguous tensor from trace
def test_slice():
    @trace
    def f(x):
        return x[:, 1::2]

    x = F.arange(8).reshape(2, 4)
    f(x)
    y = f(x)
    np.testing.assert_array_equal(y.numpy(), x.numpy()[:, 1::2])
    y + y
564 565


566 567
@pytest.mark.parametrize("shape_mode", [False, True])
def test_random(shape_mode):
568
    def run_test(op):
569 570 571 572 573 574 575 576 577 578 579
        @trace(symbolic=True, symbolic_shape=shape_mode)
        def f():
            out = op(size=[10, 10])
            out_shape = out.shape
            assert out_shape is not None
            if not isinstance(out_shape, tuple):
                assert out.shape.numpy() is not None
            return out

        for _ in range(3):
            f()
580 581 582

    run_test(uniform)
    run_test(normal)
583 584 585 586 587 588


@pytest.mark.parametrize("shape_mode", [False, True])
def test_trace_advance_indexing(shape_mode):
    funcs = [
        lambda x, i: x[i],
589
        lambda x, i, j: x[i, j],
590
        lambda x, i, j: x[i, :, j, ...],
591
        lambda x, start, end: x[start:end],
592 593 594 595 596 597 598 599 600 601
        lambda x, start, end: x[:, 0, start:end, ..., 1],
        lambda x, vec: x[vec],
        lambda x, vec: x[vec, ..., 0, 1:3],
        lambda x, vec: x[vec, vec[0], vec[1]],
        # lambda x, i, start, end, vec: x[i, ..., :, vec, start:end],  # FIXME
        lambda x, mask: x[mask],
    ]

    inputs = {
        "x": np.random.randn(5, 5, 5, 5, 5).astype("float32"),
602
        "i": 4,
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623
        "j": 2,
        "start": 1,
        "end": 3,
        "vec": [1, 2, 3],
        "mask": np.random.randn(5, 5, 5, 5, 5) >= 0,
    }
    for f in funcs:
        sig = inspect.signature(f)
        param_names = list(sig._parameters.keys())
        params = {}
        params_np = {}
        f_traced = trace(f, symbolic=False, symbolic_shape=shape_mode)
        for name in param_names:
            params[name] = tensor(inputs[name])
            params_np[name] = inputs[name]
        expected = f(**params_np)
        result_imperative = f(**params)
        np.testing.assert_equal(expected, result_imperative.numpy())
        for _ in range(3):
            result_trace = f_traced(**params)
            np.testing.assert_equal(expected, result_trace.numpy())
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650


@pytest.mark.require_ngpu(1)  # nvrtc backend
def test_trace_jit_config():
    def run(fuse_dimshuffle, fuse_reduce):
        config = GraphOptimizationConfig()
        config.jit_fuse_dimshuffle = fuse_dimshuffle
        config.jit_fuse_reduce = fuse_reduce

        # set opt_level = 1 to avoid fusing dimshuffle and reduce at the same time
        @trace(opt_level=1, graph_opt_config=config)
        def func(x):
            return x + 1

        x = tensor(2)
        y = func(x)
        func._compile()

        options = func._graph.options
        mapping = {None: 0, False: 1, True: 2}
        assert options.graph_opt.jit == 0
        assert options.graph_opt.jit_config.fuse_dimshuffle == mapping[fuse_dimshuffle]
        assert options.graph_opt.jit_config.fuse_reduce == mapping[fuse_reduce]

    for fuse_dimshuffle in [None, False, True]:
        for fuse_reduce in [None, False, True]:
            run(fuse_dimshuffle, fuse_reduce)