diff --git a/imperative/python/megengine/autodiff/grad_manager.py b/imperative/python/megengine/autodiff/grad_manager.py index f94abe8fc1f28be8c053f8d438d576380a265826..6a9c65a342ab00f4bda0e1fb1115e28589d57afb 100644 --- a/imperative/python/megengine/autodiff/grad_manager.py +++ b/imperative/python/megengine/autodiff/grad_manager.py @@ -3,6 +3,7 @@ from collections import defaultdict from contextlib import contextmanager from typing import Callable +from ..core._imperative_rt.core2 import pop_scope, push_scope from ..core.autodiff.grad import Grad from ..logger import get_logger from ..tensor import Tensor @@ -239,6 +240,7 @@ class GradManager: :param y: tensor or list of tensors :param dy: tensor or list of tensors. Defaults to 1 if y is scalar """ + push_scope("backward") from ..functional import ones_like global backwarding_grad_manager @@ -280,6 +282,7 @@ class GradManager: finally: self.release() backwarding_grad_manager = cache + pop_scope("backward") def record(self): r""" diff --git a/imperative/python/megengine/core/__init__.py b/imperative/python/megengine/core/__init__.py index 5de7a67da757fde65b6d5e5f8af9294b424ae9b1..d8df582960d042dbccfa9d662a1e2be55c18a9cb 100644 --- a/imperative/python/megengine/core/__init__.py +++ b/imperative/python/megengine/core/__init__.py @@ -8,5 +8,17 @@ # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. import os import sys +from contextlib import contextmanager +from ._imperative_rt.core2 import get_option, set_option from .tensor.megbrain_graph import Graph + + +@contextmanager +def option(key, value): + value = int(value) + old = get_option(key) + set_option(key, value) + yield + assert get_option(key) == value + set_option(key, old) diff --git a/imperative/python/megengine/module/module.py b/imperative/python/megengine/module/module.py index 964bbef68962c3ea256c5905450488cf9497c213..28dcd76064f8ac5af9f8320b60328af089abe22d 100644 --- a/imperative/python/megengine/module/module.py +++ b/imperative/python/megengine/module/module.py @@ -12,6 +12,7 @@ from typing import Any, Callable, Iterable, Optional, Set, Tuple, Union import numpy as np +from ..core._imperative_rt.core2 import pop_scope, push_scope from ..core.tensor.utils import make_shape_tuple from ..logger import get_logger from ..tensor import Parameter, Tensor @@ -78,6 +79,7 @@ class Module(metaclass=ABCMeta): self._forward_hooks = OrderedDict() self._modules = [] + self._name = "{anonymous}" @abstractmethod def forward(self, inputs): @@ -103,6 +105,7 @@ class Module(metaclass=ABCMeta): return HookHandler(self._forward_hooks, hook) def __call__(self, *inputs, **kwargs): + push_scope(self._name) for hook in self._forward_pre_hooks.values(): modified_inputs = hook(self, inputs) if modified_inputs is not None: @@ -116,6 +119,7 @@ class Module(metaclass=ABCMeta): modified_outputs = hook(self, inputs, outputs) if modified_outputs is not None: outputs = modified_outputs + pop_scope(self._name) return outputs def _flatten( @@ -571,6 +575,14 @@ class Module(metaclass=ABCMeta): return set(loaded), set(skipped) + def __getattribute__(self, name: str): + value = super().__getattribute__(name) + if name == "_name": + return value + if _is_module(value): + value._name = name + return value + def __setattr__(self, name: str, value): if _is_module(value): modules = self.__dict__.get("_modules") diff --git a/imperative/python/megengine/optimizer/optimizer.py b/imperative/python/megengine/optimizer/optimizer.py index 69ef5239c162c7278982125c306e91b203095885..49c39ae76b461c98fb6b26c8d51488435245ce34 100644 --- a/imperative/python/megengine/optimizer/optimizer.py +++ b/imperative/python/megengine/optimizer/optimizer.py @@ -15,6 +15,7 @@ from typing import Union import numpy as np +from ..core._imperative_rt.core2 import pop_scope, push_scope from ..core.tensor.utils import set_convert_inputs from ..tensor import Parameter, Tensor from ..utils.deprecation import deprecated @@ -155,7 +156,9 @@ class Optimizer(metaclass=ABCMeta): "but the ordering of parameters in sets will change between runs. " "Please use a list instead." ) + push_scope("step") self._updates(group) + pop_scope("step") # restore the globle state `_enable_convert_inputs` set_convert_inputs(backup) return self @@ -172,8 +175,10 @@ class Optimizer(metaclass=ABCMeta): Set the grad attribute to None for all parameters. """ for param_group in self.param_groups: + push_scope("clear_grad") for param in param_group["params"]: param.grad = None + pop_scope("clear_grad") def state_dict(self) -> Dict: r""" diff --git a/imperative/python/megengine/utils/profiler.py b/imperative/python/megengine/utils/profiler.py index 689e8c69dddb90830553f400c669086dadb29c69..ae70da26fd18abf31bc676a6d22af040a2bc07b8 100644 --- a/imperative/python/megengine/utils/profiler.py +++ b/imperative/python/megengine/utils/profiler.py @@ -6,159 +6,17 @@ # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -import base64 import json -import os -import re -from typing import Iterable, List, Optional +from contextlib import contextmanager +from typing import List -from ..core._imperative_rt import OperatorNodeConfig, ProfileEntry -from ..core._imperative_rt import ProfilerImpl as _Profiler -from ..core._imperative_rt.core2 import sync -from ..core._imperative_rt.ops import CollectiveComm - - -def _make_dict(**kwargs): - unused_keys = [] - for k, v in kwargs.items(): - if v is None: - unused_keys.append(k) - for k in unused_keys: - del kwargs[k] - return kwargs - - -def _print_opnode_config(config): - return _make_dict( - name=config.name, dtype=config.dtype, comp_node_arr=config.comp_node_arr, - ) - - -def _dump_chrome_timeline(entries: List[ProfileEntry], path: str): - pid = os.getpid() - trace_events = [] - - def append_event(**kwargs): - trace_events.append(_make_dict(**kwargs)) - - for id, entry in enumerate(entries): - op = entry.op - name = type(op).__name__ - host_begin, host_end = entry.host - device_list = entry.device_list - args = Profiler.fetch_attrs(op) - args["__id__"] = "[{}]".format(id) - cat = name - for ts, ph in [(host_begin, "B"), (host_end, "E")]: - append_event( - name=name, ph=ph, ts=ts * 1000, pid=pid, tid="host", args=args, cat=cat, - ) - for device, device_begin, device_end in device_list: - for ts, ph in [(device_begin(), "B"), (device_end(), "E")]: - append_event( - name=name, ph=ph, ts=ts * 1000, pid=pid, tid=str(device), args=args, - ) - with open("{}.chrome_timeline.json".format(path), "w") as f: - json.dump(trace_events, f, indent=2) - - -def _dump_compatible(entries: List[ProfileEntry], path: str): - obj = { - "graph_exec": {"var": [], "operator": {}}, - "profiler": {"device": {}, "host": {}, "opr_footprint": {}}, - } - var_list = obj["graph_exec"]["var"] - operator_dict = obj["graph_exec"]["operator"] - device_dict = obj["profiler"]["device"] - host_dict = obj["profiler"]["host"] - opr_foot_print_dict = obj["profiler"]["opr_footprint"] - - def add_var(var) -> int: - var_id = len(var_list) - var_list.append( - {"comp_node": str(var[2]),} - ) - return var_id - - for op_id, entry in enumerate(entries): - operator_dict[op_id] = { - "input": [add_var(var) for var in entry.inputs], - "output": [add_var(var) for var in entry.outputs], - "name": str(entry.op.ctype()), - "type": "imperative", - "id": entry.id, - } - op_device_dict = {} - for device, device_begin, device_end in entry.device_list: - op_device_dict[str(device)] = { - "start": device_begin(), - "kern": device_begin(), - "end": device_end(), - } - device_dict[op_id] = op_device_dict - host_begin, host_end = entry.host - host_dict[op_id] = { - "host": {"start": host_begin, "kern": host_begin, "end": host_end} - } - opr_footprint = { - "out_shapes": [oup[1] for oup in entry.outputs], - "in_shapes": [inp[1] for inp in entry.inputs], - "params": {}, - } - if entry.memory > 0: - opr_footprint["memory"] = entry.memory - if entry.computation > 0: - opr_footprint["computation"] = entry.computation - opr_foot_print_dict[op_id] = opr_footprint - with open("{}.compatible.json".format(path), "w") as f: - json.dump(obj, f, indent=2) - - -def _dump_graphviz(entries: List[ProfileEntry], path: str): - import json - - import graphviz - - graph = graphviz.Digraph() - graph.graph_attr["ordering"] = "out" - var_cache = {} - - def cache_var(var_id, var_shape): - if var_id not in var_cache: - var_name = "var({})".format(var_id) - var_label = "{}\nshape:{}\n".format(var_name, shape) - graph.node(var_name, var_label) - var_cache[var_id] = var_name - return var_cache[var_id] - - for op_id, entry in enumerate(entries): - op = entry.op - op_name = "op({})".format(op_id) - op_type = type(op).__name__ - op_attrs = Profiler.fetch_attrs(op) - label_lines = [] - if "param" in op_attrs: - del op_attrs["param"] - label_lines.append("{}:{}".format(op_name, op_type)) - for k, v in op_attrs.items(): - label_lines.append("attr[{}]: {}".format(k, v)) - op_param_str = entry.param - if len(op_param_str) > 0: - op_param = json.loads(op_param_str) - for k, v in op_param.items(): - label_lines.append("param[{}]:{}".format(k, v)) - host_begin, host_end = entry.host - label_lines.append("time[host]: {:f}ms".format(host_end - host_begin)) - for device, device_begin, device_end in entry.device_list: - device_time = device_end() - device_begin() - label_lines.append("time[{}]: {:f}ms".format(device, device_time)) - op_label = "\n".join(label_lines) - graph.node(op_name, op_label, shape="rectangle") - for var_id, shape, device in entry.inputs: - graph.edge(cache_var(var_id, shape), op_name) - for var_id, shape, device in entry.outputs: - graph.edge(op_name, cache_var(var_id, shape)) - graph.save("{}.graphviz.dot".format(path)) +from ..core._imperative_rt.core2 import ( + pop_scope, + push_scope, + start_profile, + stop_profile, + sync, +) class Profiler: @@ -181,85 +39,45 @@ class Profiler: # Only profile record of last iter would be saved with Profiler("profile"): # your code here - + # Then open the profile file in chrome timeline window """ - CHROME_TIMELINE = "chrome_timeline" - COMPATIBLE = "compatible" - GRAPHVIZ = "graphviz" - - WITH_FOOTPRINT = 1 + CHROME_TIMELINE = "chrome_timeline.json" - _type_map = { - OperatorNodeConfig: lambda x: _print_opnode_config(x), - bytes: lambda x: base64.encodebytes(x).decode("ascii"), - CollectiveComm.Mode: lambda x: str(x), - } - - _dumper_map = { - CHROME_TIMELINE: _dump_chrome_timeline, - COMPATIBLE: _dump_compatible, - GRAPHVIZ: _dump_graphviz, - } + COMMAND = 1 << 0 + OPERATOR = 1 << 1 + TENSOR_LIFETIME = 1 << 2 + TENSOR_PROP = 1 << 3 + SYNC = 1 << 4 + SCOPE = 1 << 5 + ALL = (1 << 6) - 1 def __init__( self, path: str = "profile", + format: str = CHROME_TIMELINE, *, - formats: Iterable[str] = (CHROME_TIMELINE,), - type_filter: str = ".*", - exit_dump: bool = True + topic=OPERATOR | SCOPE, + align_time=True, + show_operator_name=True ) -> None: - self._impl = _Profiler() self._path = path - - if isinstance(formats, str): - formats = (formats,) - - self._filter = type_filter - self._dumpers = [Profiler._dumper_map[fmt] for fmt in formats] - self._exit_dump = exit_dump + self._format = format + self._options = { + "topic": int(topic), + "align_time": int(align_time), + "show_operator_name": int(show_operator_name), + } def __enter__(self): - sync() - self._impl.start(Profiler.WITH_FOOTPRINT) + start_profile(self._options) return self def __exit__(self, val, tp, trace): - if self._exit_dump: - self.dump() - sync() - self._impl.stop() - self._impl.clear() - - @classmethod - def fetch_attrs(cls, op): - attrs = dir(op) - results = {} - for attr in attrs: - if attr.startswith("_"): - continue - value = op.__getattribute__(attr) - if callable(value): - continue - value_type = type(value) - if value_type in cls._type_map: - value = cls._type_map[value_type](value) - results[attr] = str(value) - return results - - def dump(self, path: Optional[str] = None): + stop_profile(self._path, self._format) + # dump is async, so it's necessary to sync interpreter sync() - raw = [ - entry - for entry in self._impl.dump() - if re.match(self._filter, type(entry.op).__name__) - ] - if path is None: - path = self._path - for dumper in self._dumpers: - dumper(raw, path) def __call__(self, func): def wrapper(*args, **kwargs): @@ -269,4 +87,23 @@ class Profiler: return wrapper +@contextmanager +def scope(name): + push_scope(name) + yield + pop_scope(name) + + profile = Profiler + + +def merge_trace_events(sources: List[str], target: str): + names = list(map(lambda x: x + ".chrome_timeline.json", sources)) + result = [] + for name in names: + with open(name, "r", encoding="utf-8") as f: + content = json.load(f) + for entry in content: + result.append(entry) + with open(target + ".chrome_timeline.json", "w") as f: + json.dump(result, f, ensure_ascii=False, indent=4) diff --git a/imperative/python/src/tensor.cpp b/imperative/python/src/tensor.cpp index 276280f46f221b62fa5a95f658d23873c746e234..2ab6a27f7c2ac490df9707a8d6af87613b600c58 100644 --- a/imperative/python/src/tensor.cpp +++ b/imperative/python/src/tensor.cpp @@ -807,16 +807,34 @@ void init_tensor(py::module m) { } } + m.def("set_option", + [](std::string name, int value){ interpreter_for_py->set_option(name, value); }); + m.def("get_option", + [](std::string name){ return interpreter_for_py->get_option(name); }); m.def("_set_swap_flag", - [](bool flag) { interpreter_for_py->set_swap_flag(flag); }); + [](bool flag) { interpreter_for_py->set_option("enable_swap", flag); }); m.def("_set_drop_flag", - [](bool flag) { interpreter_for_py->set_drop_flag(flag); }); + [](bool flag) { interpreter_for_py->set_option("enable_drop", flag); }); m.def("config_async_level", - [](int level) { interpreter_for_py->config_async_level(level); }); + [](int level) { + mgb_assert(level >= 0 and level <= 2, "async_level should be 0, 1 or 2"); + interpreter_for_py->set_option("async_level", level); + }); m.def("get_async_level", - []() { return interpreter_for_py->get_async_level(); }); + []() { return interpreter_for_py->get_option("async_level"); }); m.def("set_buffer_length", - [](int length) { interpreter_for_py->set_buffer_length(length); }); + [](int length) { + mgb_assert(length >= 0 and length < 100, "buffer_length should be in [0, 100)"); + interpreter_for_py->set_option("buffer_length", length); + }); + m.def("push_scope", + [](std::string name) { interpreter_for_py->push_scope(name); }); + m.def("pop_scope", + [](std::string name) { interpreter_for_py->pop_scope(name); }); + m.def("start_profile", + [](std::unordered_map option) { return interpreter_for_py->start_profile(option); }); + m.def("stop_profile", + [](std::string basename, std::string format) { interpreter_for_py->stop_profile(basename, format); }); m.def("sync", []() { interpreter_for_py->sync(); diff --git a/imperative/python/src/utils.cpp b/imperative/python/src/utils.cpp index b3e7c7f17a284f66240ce8058f412bcbbda93fc8..3852a95cddc99860ca1e58beec68131d1eae104e 100644 --- a/imperative/python/src/utils.cpp +++ b/imperative/python/src/utils.cpp @@ -200,33 +200,6 @@ void init_utils(py::module m) { m.def("_get_device_count", &mgb::CompNode::get_device_count, "Get total number of specific devices on this system"); - using mgb::imperative::ProfileEntry; - - py::class_(m, "ProfileEntry") - .def_readwrite("op", &ProfileEntry::op) - .def_readwrite("host", &ProfileEntry::host) - .def_readwrite("device_list", &ProfileEntry::device_list) - .def_readwrite("inputs", &ProfileEntry::inputs) - .def_readwrite("outputs", &ProfileEntry::outputs) - .def_readwrite("id", &ProfileEntry::id) - .def_readwrite("parent", &ProfileEntry::parent) - .def_readwrite("memory", &ProfileEntry::memory) - .def_readwrite("computation", &ProfileEntry::computation) - .def_property_readonly("param", [](ProfileEntry& self)->std::string{ - if(self.param){ - return self.param->to_string(); - } else { - return {}; - } - }); - - py::class_(m, "ProfilerImpl") - .def(py::init<>()) - .def("start", &mgb::imperative::Profiler::start) - .def("stop", &mgb::imperative::Profiler::stop) - .def("clear", &mgb::imperative::Profiler::clear) - .def("dump", &mgb::imperative::Profiler::get_profile); - using mgb::imperative::TensorSanityCheck; py::class_(m, "TensorSanityCheckImpl") .def(py::init<>()) diff --git a/imperative/python/test/integration/test_profiler.py b/imperative/python/test/integration/test_profiler.py new file mode 100644 index 0000000000000000000000000000000000000000..af8b5c469a85f33c7120da2c08398f4ea0fa8f22 --- /dev/null +++ b/imperative/python/test/integration/test_profiler.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +import json +import os + +import pytest + +from megengine import Parameter, tensor +from megengine.core import option +from megengine.module import Module +from megengine.utils.profiler import Profiler, scope + + +class Simple(Module): + def __init__(self): + super().__init__() + self.a = Parameter([1.23], dtype="float32") + + def forward(self, x): + x = x * self.a + return x + + +def test_profiler(): + profile_prefix = "pytest_profile" + profile_format = "chrome_timeline.json" + profile_path = "{}.{}".format(profile_prefix, profile_format) + with Profiler(profile_prefix, format=profile_format): + with scope("my_scope"): + oup = Simple()(tensor([1.23], dtype="float32")) + with open(profile_path, "r") as f: + events = json.load(f) + os.remove(profile_path) + prev_ts = {} + scope_count = 0 + for event in events: + if "dur" in event: + assert event["dur"] >= 0 + elif "ts" in event and "tid" in event: + ts = event["ts"] + tid = event["tid"] + if ts == 0: + continue + assert (tid not in prev_ts) or prev_ts[tid] <= ts + prev_ts[tid] = ts + if "name" in event and event["name"] == "my_scope": + scope_count += 1 + assert scope_count > 0 and scope_count % 2 == 0 diff --git a/imperative/src/impl/function_hook.h b/imperative/src/impl/function_hook.h index 70f3d39857ddef5df378894180dbff6fa9f4dfbf..7f4cff0532180c19c999fe5f557a8689ebd83e1e 100644 --- a/imperative/src/impl/function_hook.h +++ b/imperative/src/impl/function_hook.h @@ -17,52 +17,37 @@ namespace mgb { namespace imperative { template -class FunctionHooker; +class FunctionHook; -template -class FunctionHooker { +template