diff --git a/CMakeLists.txt b/CMakeLists.txt index 39bf70cd422d898442bbac16fd9b96ceb4063c16..2bb5d3b636309abe989ee780afa9db9b2775ac9f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -247,10 +247,6 @@ if(MGE_BUILD_IMPERATIVE_RT) set(CMAKE_CXX_STANDARD 17) endif() -if(MGE_BUILD_IMPERATIVE_RT) - set(MGE_BUILD_SDK OFF) -endif() - if(NOT MGE_WITH_CUDA) message("-- Disable distributed support, as CUDA is not enabled.") set(MGE_WITH_DISTRIBUTED OFF) @@ -697,9 +693,7 @@ if(MGE_WITH_PYTHON_MODULE) endif() if(MGE_WITH_TEST AND MGE_ENABLE_RTTI) - if(NOT MGE_BUILD_IMPERATIVE_RT) - add_subdirectory(test) - endif() + add_subdirectory(test) endif() if(TARGET mgb) diff --git a/dnn/CMakeLists.txt b/dnn/CMakeLists.txt index 6bdb2681723c6652a36ad6b5033d41c80f6abe52..7e2012d1abd62e8fc67cc46cd02e5d0c17b89f4a 100644 --- a/dnn/CMakeLists.txt +++ b/dnn/CMakeLists.txt @@ -66,9 +66,7 @@ if(MGE_WITH_CUDA) endif() if(MGE_WITH_TEST) - if(NOT MGE_BUILD_IMPERATIVE_RT) - add_subdirectory(test) - endif() + add_subdirectory(test) endif() add_subdirectory(src) diff --git a/imperative/.gitignore b/imperative/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..67074a4e4ec3ca9363645acabc71228213de65e6 --- /dev/null +++ b/imperative/.gitignore @@ -0,0 +1,5 @@ +Makefile +/test/imperative_test +*.so +/python/megengine/core/ops/_internal/generated_ops.py +/python/megengine/core/ops/_internal/param_defs.py diff --git a/imperative/CMakeLists.txt b/imperative/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..55a97a20f5ea2b2b92e33bb92ca87a31d76230a5 --- /dev/null +++ b/imperative/CMakeLists.txt @@ -0,0 +1,110 @@ +find_package(NumPy REQUIRED) + +set(PACKAGE_NAME megengine) +set(PACKAGE_NAME ${PACKAGE_NAME} PARENT_SCOPE) +set(MODULE_NAME _imperative_rt) +set(MODULE_NAME ${MODULE_NAME} PARENT_SCOPE) +file(GLOB_RECURSE SRCS src/impl/*.cpp src/include/*.h python/src/*.cpp python/src/*.h) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMGB_WITH_IMPERATIVE=1") + +file(GLOB_RECURSE OPR_DECL_SRCS "${PROJECT_SOURCE_DIR}/src/**/*.oprdecl") +file(GLOB_RECURSE PYTHON_SRCS python/${PACKAGE_NAME}/*.py) +list(REMOVE_ITEM PYTHON_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/core/ops/_internal/generated_ops.py ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/core/ops/_internal/param_defs.py) +file(GLOB_RECURSE ALL_HEADERS src/cpp/megbrain_pubapi.h + ${PROJECT_SOURCE_DIR}/src/core/include/* + ${PROJECT_SOURCE_DIR}/src/opr/include/* + ${PROJECT_SOURCE_DIR}/src/serialization/include/* + ${PROJECT_SOURCE_DIR}/src/plugin/include/* + ${PROJECT_SOURCE_DIR}/dnn/include/*) + +set(MEGENGINE_DIR ${CMAKE_CURRENT_BINARY_DIR}/python/) +set(GEN_OPS_DIR ${MEGENGINE_DIR}/${PACKAGE_NAME}/core/ops/_internal) +file(MAKE_DIRECTORY ${GEN_OPS_DIR}) +set(GEN_OPS_FILE ${GEN_OPS_DIR}/generated_ops.py) +set(GEN_OP_PARAMS_FILE ${MEGENGINE_DIR}/${PACKAGE_NAME}/core/ops/_internal/param_defs.py) +set(GEN_OP_PARAMS_TEMPLATE ${CMAKE_CURRENT_SOURCE_DIR}/python/tools/ops.tpl.py) + +##################### generate python opr_param_defs.py ############## + +file(COPY ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) +file(READ ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py CONTENTS) +file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/opr_param_defs.py ${CONTENTS}) + +add_custom_command( + OUTPUT ${GEN_OPS_FILE} + COMMAND ${CMAKE_COMMAND} -E touch ${MEGENGINE_DIR}/${PACKAGE_NAME}/core/${MODULE_NAME}.so ${GEN_OPS_FILE} ${GEN_OP_PARAMS_FILE} + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/${PACKAGE_NAME} ${MEGENGINE_DIR}/${PACKAGE_NAME} + COMMAND ${CMAKE_COMMAND} -E remove -f ${MEGENGINE_DIR}/${PACKAGE_NAME}/core/${MODULE_NAME}.so ${GEN_OPS_FILE} ${GEN_OP_PARAMS_FILE} + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/python/tools/gen_ops.py ${OPR_DECL_SRCS} -o ${GEN_OPS_FILE} + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/test ${MEGENGINE_DIR}/${PACKAGE_NAME}/test + COMMAND ${PYTHON_EXECUTABLE} ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_param_defs.py -t py --imperative ${CMAKE_CURRENT_BINARY_DIR}/opr_param_defs.py ${GEN_OP_PARAMS_FILE} + DEPENDS ${OPR_DECL_SRCS} ${PYTHON_SRCS} ${ALL_HEADERS} ${GEN_OP_PARAMS_TEMPLATE} + VERBATIM +) + +add_custom_target(gen_opr_py DEPENDS ${GEN_OPS_FILE}) + +##################### generate opdef c header and python binding ############## + +set(OP_DEF_HEADER_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/include) +file(MAKE_DIRECTORY ${OP_DEF_HEADER_OUT_DIR}/megbrain/imperative/opdef) +set(OP_DEF_HEADER ${OP_DEF_HEADER_OUT_DIR}/megbrain/imperative/opdef/all.h) +set(OP_DEF_PYTHON_BINDING_OUT_DIR ${MEGENGINE_DIR}/${PACKAGE_NAME}/src) +file(MAKE_DIRECTORY ${OP_DEF_PYTHON_BINDING_OUT_DIR}) +set(OP_DEF_PYTHON_BINDING ${OP_DEF_PYTHON_BINDING_OUT_DIR}/opdef.inl) +set(OP_PARAM_DEF ${CMAKE_CURRENT_BINARY_DIR}/opr_param_defs.py) +set(GEN_OP_DEF_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/python/tools/gen_op_defs.py) + +add_custom_command( + OUTPUT ${OP_DEF_HEADER} ${OP_DEF_PYTHON_BINDING} + COMMAND ${PYTHON_EXECUTABLE} ${GEN_OP_DEF_SCRIPT} ${OP_PARAM_DEF} ${OP_DEF_HEADER} + COMMAND ${PYTHON_EXECUTABLE} ${GEN_OP_DEF_SCRIPT} -t py ${OP_PARAM_DEF} ${OP_DEF_PYTHON_BINDING} + DEPENDS ${GEN_OP_DEF_SCRIPT} ${OP_PARAM_DEF} + VERBATIM +) + +add_custom_target(gen_op_def_internal DEPENDS ${OP_DEF_HEADER} ${OP_DEF_PYTHON_BINDING}) +add_library(gen_op_def INTERFACE) +target_include_directories(gen_op_def INTERFACE ${OP_DEF_HEADER_OUT_DIR} ${OP_DEF_PYTHON_BINDING_OUT_DIR}) +add_dependencies(gen_op_def gen_op_def_internal) + +##################### end of opdef generation ######################### + +set(VERSION_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/src/version.ld) +add_custom_target(_version_ld SOURCES ${VERSION_SCRIPT}) + +add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11) +pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS}) +target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) +if (MGE_WITH_DISTRIBUTED) + message("Imperative configured to link megray") + target_link_libraries(${MODULE_NAME} PRIVATE megray) +endif() +target_include_directories(${MODULE_NAME} PUBLIC src/include PRIVATE ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR}) +target_compile_definitions(${MODULE_NAME} PRIVATE MODULE_NAME=${MODULE_NAME}) +target_compile_options(${MODULE_NAME} PRIVATE -Wno-unused-parameter) +if(CXX_SUPPORT_WCLASS_MEMACCESS) + target_compile_options(${MODULE_NAME} PRIVATE "-Wno-class-memaccess") +endif() +set_target_properties(${MODULE_NAME} PROPERTIES + SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX} + LIBRARY_OUTPUT_DIRECTORY ${MEGENGINE_DIR}/${PACKAGE_NAME}/core +) +add_dependencies(${MODULE_NAME} gen_opr_py _version_ld) + +if(MGE_WITH_TEST AND MGE_ENABLE_RTTI) + add_subdirectory(test) +endif() + +add_custom_command( + TARGET ${MODULE_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/LICENSE ${PROJECT_SOURCE_DIR}/ACKNOWLEDGMENTS ${PROJECT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine ${CMAKE_CURRENT_BINARY_DIR}/python/megengine + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/test ${CMAKE_CURRENT_BINARY_DIR}/python/test + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/setup.py ${CMAKE_CURRENT_BINARY_DIR}/python/setup.py + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/requires.txt ${CMAKE_CURRENT_BINARY_DIR}/python/requires.txt + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/requires-style.txt ${CMAKE_CURRENT_BINARY_DIR}/python/requires-style.txt + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/requires-test.txt ${CMAKE_CURRENT_BINARY_DIR}/python/requires-test.txt +) + diff --git a/imperative/python/megengine/__init__.py b/imperative/python/megengine/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f27cdc7270dfb0dd99f640611906e4b0d7a03757 --- /dev/null +++ b/imperative/python/megengine/__init__.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import os +import sys + +from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func +from .device import * +from .logger import enable_debug_log, get_logger, set_log_file, set_log_level +from .serialization import load, save +from .tensor import Tensor, tensor +from .tensor_nn import Buffer, Parameter +from .version import __version__ + +_set_fork_exec_path_for_timed_func( + sys.executable, + os.path.join(os.path.dirname(__file__), "utils", "_timed_func_fork_exec_entry.py"), +) + +del _set_fork_exec_path_for_timed_func diff --git a/imperative/python/megengine/core/__init__.py b/imperative/python/megengine/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e24057552a33f3d62428ae53ad0a4f17186b9e5b --- /dev/null +++ b/imperative/python/megengine/core/__init__.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import os +import sys + +from .tensor import Tensor diff --git a/imperative/python/megengine/core/_wrap.py b/imperative/python/megengine/core/_wrap.py new file mode 100644 index 0000000000000000000000000000000000000000..c4bf756440cb0654cdb08917376075abe3f2e528 --- /dev/null +++ b/imperative/python/megengine/core/_wrap.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +from ._imperative_rt import CompNode + + +class Device: + def __init__(self, device=None): + if device is None: + self._cn = CompNode() + elif isinstance(device, Device): + self._cn = device._cn + elif isinstance(device, CompNode): + self._cn = device + else: + self._cn = CompNode(device) + + def to_c(self): + return self._cn + + def __repr__(self): + return "{}({})".format(type(self).__qualname__, self) + + def __str__(self): + return str(self._cn) + + def __hash__(self): + return hash(str(self._cn)) + + def __eq__(self, rhs): + if not isinstance(rhs, Device): + rhs = Device(rhs) + return str(self._cn) == str(rhs._cn) + + +def device(obj): + if isinstance(obj, Device): + return obj + return Device(obj) diff --git a/imperative/python/megengine/core/autodiff/__init__.py b/imperative/python/megengine/core/autodiff/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1207b5d98cd3578bc39e9ce600a1254a434880c8 --- /dev/null +++ b/imperative/python/megengine/core/autodiff/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/imperative/python/megengine/core/autodiff/builtin_op_utils.py b/imperative/python/megengine/core/autodiff/builtin_op_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..54d959d173ee437111e5d7bb79df0acb22a4edaf --- /dev/null +++ b/imperative/python/megengine/core/autodiff/builtin_op_utils.py @@ -0,0 +1,134 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import functools +import itertools + +import numpy as np + +from .._imperative_rt import TensorAttr, imperative +from ..ops.builtin import Elemwise, GetVarShape, OpDef, OprAttr, Reduce, Reshape +from ..tensor.core import apply +from ..tensor.function import Function + + +@functools.singledispatch +def builtin_op_get_backward_fn(op: OpDef, inputs, outputs, input_requires_grad): + assert 0 + + +_elemwise_add_param = Elemwise(mode="add").to_c().param + + +@builtin_op_get_backward_fn.register(OpDef) +def _(op: OpDef, inputs, outputs, input_requires_grad): + if ( + isinstance(op, OprAttr) + and op.type == "Elemwise" + and op.param == _elemwise_add_param + ): + grad_fn = elemwise_grad_fn + elif isinstance(op, OprAttr) and op.type == Reshape.name: + grad_fn = reshape_grad_fn + else: + grad_fn = default_grad_fn + return grad_fn(op, inputs, outputs, input_requires_grad) + + +@builtin_op_get_backward_fn.register(Function) +def _(op: Function, inputs, outputs, input_requires_grad): + return op.get_backward_fn(), [True,] * len(outputs) + + +def default_grad_fn(op, inputs, outputs, input_requires_grad): + def get_tensor_attr(x): + attr = TensorAttr() + attr.dtype = x.dtype + attr.comp_node = x.device.to_c() + return attr + + output_has_grads = [True,] * len(outputs) + result = imperative.make_backward_graph( + op, list(map(get_tensor_attr, inputs)), input_requires_grad, output_has_grads + ) + if result is None: + nr_inputs = len(inputs) + nr_outputs = len(outputs) + + def backward(*args): + return nr_inputs * [ + None, + ] + + return backward, nr_outputs * [False,] + backward_graph, save_for_backward_mask, input_has_grad = result + + intput_output_mask = save_for_backward_mask[: len(inputs + outputs) :] + output_grad_mask = save_for_backward_mask[len(inputs + outputs) :] + save_for_backward = tuple( + val for val, mask in zip(inputs + outputs, intput_output_mask) if mask + ) + del inputs + del outputs + + def backward(*args): + output_grads = tuple(val for val, mask in zip(args, output_grad_mask) if mask) + assert None not in output_grads + ret = iter(apply(backward_graph, *(save_for_backward + output_grads))) + return tuple(next(ret) if mask else None for mask in input_has_grad) + + return backward, output_grad_mask + + +# override for elemwise +def elemwise_grad_fn(op, inputs, outputs, input_requires_grad): + assert len(inputs) == len(input_requires_grad) == 2 + + def get_shape(x): + (s,) = apply(GetVarShape(), x) + return s + + input_shapes = [ + get_shape(x) if i else None for i, x in zip(input_requires_grad, inputs) + ] + + def reduce_to(x, s): + (y,) = apply(Reduce(), x, s) + return y + + def backward(dy): + return tuple( + reduce_to(dy, s) if i else None + for i, s in zip(input_requires_grad, input_shapes) + ) + + return backward, [True] + + +def reshape_grad_fn(op, inputs, outputs, input_requires_grad): + assert len(inputs) == len(input_requires_grad) == 2 + + def get_shape(x): + (s,) = apply(GetVarShape(), x) + return s + + input_shapes = [ + get_shape(x) if i else None for i, x in zip(input_requires_grad, inputs) + ] + + def reshape_to(dy, s): + (dx,) = apply(Reshape(), dy, s) + return dx + + def backward(dy): + return tuple( + reshape_to(dy, s) if i else None + for i, s in zip(input_requires_grad, input_shapes) + ) + + return backward, [True] diff --git a/imperative/python/megengine/core/autodiff/grad.py b/imperative/python/megengine/core/autodiff/grad.py new file mode 100644 index 0000000000000000000000000000000000000000..8b1b337685adb179f51c6f3e160507e0cbaa0a97 --- /dev/null +++ b/imperative/python/megengine/core/autodiff/grad.py @@ -0,0 +1,390 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import functools +import heapq +import itertools +import typing +import weakref + +import numpy as np + +from ..ops.builtin import Elemwise, OpDef +from ..ops.special import Const +from ..tensor.core import TensorBase, TensorWrapperBase, apply +from ..tensor.function import Function +from ..tensor.tensor import Tensor, get_context +from . import builtin_op_utils + +""" Some notes: + 1. Initialize the optimizer: + for each trainable parameter: + call wrt(param, callback) + Each parameter tensor will be assciated with a Tracer object saved in Tensor._extra_data + 2. Tracer has one member: node, which is a VariableNode + 3. VariableNode has a OpNode member: opnode + 4. OpNode has four members: + a. id + b. inputs, which is made of VariableNode + c. outputs, which are weakref's to VariableNode + d. backward: call back function + e. has_grad_fn: call has_grad_fn(opnode, reached) to check grad exist + f. backward_allow_noinput: whether backward allow noinput + +""" + +_grad_count = 0 +_grad_manager_dict = weakref.WeakValueDictionary() + + +def get_grad_managers(): + return [_grad_manager_dict[key] for key in _grad_manager_dict] + + +def add(a, b): + (c,) = apply(Elemwise(mode="add"), a, b) + return c + + +def get_tensor(x): + # use recursion to avoid infinite loop + if isinstance(x, Tensor): + return x + try: + x = x.__wrapped__ + except AttributeError: + raise TypeError(type(x)) + return get_tensor(x) + + +class Grad: + def __init__(self, name=None): + + if name is None: + global _grad_count + self._name = "grad_" + str(_grad_count) + _grad_count += 1 + else: + self._name = name + assert self._name not in _grad_manager_dict, "grad manager name duplicated" + _grad_manager_dict[self._name] = self + + # list of all x in partial(y) / partial(x) + self.xs = [] + + # constains weak reference of all OpNode during forward + # OpNode contains inputs, outputs and its backward + # ops forms the computational graph + self.ops = [] + + self._enabled = True + + @property + def name(self): + return self._name + + def wrt(self, *args: Tensor, callback=None): + """ Indicates the loss is a function of the input tensors (usually the net trainable parameters), + i.e., d (loss) / d (Tensor) != 0 + + callback is used to perform additional operations after gradient is obtained in backward. + e.g., copy the grad to a particular place + + A VariableNode will be created and saved in the tensor/s _extra_data slot. + """ + + for x in map(get_tensor, args): + v = self._new_variable(x, callback=callback) + assert self not in x._extra_data + x._extra_data[self] = Tracer(v) + self.xs.append(v) + + return self + + def _new_variable(self, owner, opnode=None, callback=None): + return VariableNode(self, owner, opnode=opnode, callback=callback) + + def _new_opnode(self, inputs, outputs): + inputs = tuple(inputs) + for i in inputs: + assert i is None or isinstance(i, VariableNode) + o = OpNode() + o.inputs = inputs + o.outputs = [] + tracers = [] + for i in outputs: + assert isinstance(i, Tensor) + v = self._new_variable(i, o) + o.outputs.append(weakref.ref(v)) + tracers.append(Tracer(v)) + self.ops.append(weakref.ref(o)) + return o, tracers + + def copy(self): + raise NotImplementedError + + def __enter__(self): + return self + + def __exit__(self, *_): + """clear all resources""" + self._enabled = False + for o in self.ops: + o = o() + if o: + o.clear() + + def __call__(self, ys, dys): + """ Defines Grad(). + + :param ys: outputs of forward operators, e.g., the loss tensor + :type ys: list of Tensor or TensorWrapperBase + :param dys: delta of outputs, physically equivalent to sensitivity of outputs to the loss, + e.g., one for the loss itself + :type dys: list of Tensor or TensorWrapperBase + """ + assert self._enabled + self._enabled = False + + def check_wrapper(): + if isinstance(dys, TensorWrapperBase): + return type(dys) + if isinstance(dys, TensorBase): + return + assert isinstance(dys, (tuple, list)) + for i in dys: + if isinstance(i, TensorWrapperBase): + return type(i) + + Wrapper = check_wrapper() + + def aslist(x): + if isinstance(x, (Tensor, TensorWrapperBase)): + x = [x] + else: + x = list(x) + x = [i.__wrapped__ if isinstance(i, TensorWrapperBase) else i for i in x] + for i in x: + assert isinstance(i, Tensor) + return x + + ys = aslist(ys) + dys = aslist(dys) + assert len(ys) == len(dys) + + # ys is changed to a list of VariableNode which contains more information + # such as OpNode, callback, etc. + ys = [i._extra_data[self].node for i in ys] + + # NOTE: callback is called only if grad is not None + + # the OpNode sequence in backward + op_seq = [] + + # VariableNode -> (i, j), where i is time stamp in backward, j means jth input + last_written_to = {} + + def schedule(): + reached = set(ys) + # i is the time stamp in backward + i = 0 + for o in self.ops[::-1]: + o = o() + if o is None: + continue + + if not o.has_grad_fn(o, reached): + continue + op_seq.append(o) + for j, v in enumerate(o.inputs): + reached.add(v) + last_written_to[v] = i, j + i += 1 + + schedule() + + # VariableNode -> Tensor + cache = {} + + def initialize(): + for y, dy in zip(ys, dys): + cache[y] = dy + if y not in last_written_to and y.callback: + y.callback(y.owner(), dy) + + initialize() + + # NOTE: None is used to mark a node has been consumed + + for seqno, opnode in enumerate(op_seq): + input_nodes = opnode.inputs + output_nodes = [i() for i in opnode.outputs] + backward = opnode.backward + backward_allow_noinput = opnode.backward_allow_noinput + opnode.clear() + + output_grads = [] + for i in output_nodes: + if i is not None: + if i in cache: + assert cache[i] is not None + output_grads.append(cache[i]) + else: + output_grads.append(None) + # read by backward, mark consumed + cache[i] = None + else: + output_grads.append(None) + if ( + any([grad is not None for grad in output_grads]) + or backward_allow_noinput + ): + input_grads = backward(*output_grads) + else: + input_grads = [None] * len(input_nodes) + + assert len(input_nodes) == len(input_grads) + for i, (v, g) in enumerate(zip(input_nodes, input_grads)): + if v is None: + continue + if v in cache: + assert cache[v] + if g is not None: + cache[v] = add(cache[v], g) + elif g is not None: + cache[v] = g + if last_written_to[v] == (seqno, i): + if v.callback: + v.callback( + v.owner(), Wrapper(cache[v]) if Wrapper else cache[v] + ) + if v.opnode is None: + # won't read by backward, mark consumed + cache[v] = None + + for v in cache.values(): + assert v is None + + +class clearable: + __cleared = False + + def __bool__(self): + return not self.__cleared + + def clear(self): + self.__dict__.clear() + self.__cleared = True + + +class OpNode(clearable): + """ OpNode saves all the information to form the computational graph. + """ + + def __init__(self): + self.id = None + self.inputs = None # Could be VariableNode + self.outputs = None # Could be VariableNode + self.backward = None + self.has_grad_fn = None + self.backward_allow_noinput = False + + +class VariableNode(clearable): + """ VariableNode saves OpNode and callback. + FIXME!!! Explain manager and owner + """ + + def __init__(self, manager, owner, opnode=None, callback=None): + # manager is Grad type + self.manager = weakref.ref(manager) + # owner is Tensor type + self.owner = weakref.ref(owner) + self.opnode = opnode + self.callback = callback + + +class Tracer(clearable, TensorBase): + def __init__(self, node=None): + """ type(node) is VariableNode + """ + self.node = node + + +@functools.singledispatch +def check_backward_allow_noinput(op: OpDef): + return False + + +@functools.singledispatch +def get_op_has_grad_fn(op: OpDef): + assert 0 + + +@get_op_has_grad_fn.register(OpDef) +def _(op: OpDef): + return default_has_grad_fn + + +@get_op_has_grad_fn.register(Function) +def _(op: Function): + return default_has_grad_fn + + +def default_has_grad_fn(opnode, reached): + for v in opnode.outputs: + if v() in reached: + return True + return False + + +@apply.add +def tracer_apply(op: (OpDef, Function), *args: typing.Optional[Tracer]): + args = tuple(i if isinstance(i, Tracer) else None for i in args) + input_requires_grad = list(map(bool, args)) + if not any(input_requires_grad): + return + + ctx = get_context() + manager = None + assert len(ctx.inputs) == len(args) + for i, j in zip(ctx.inputs, args): + if j: + j = j.node + assert i is j.owner() + if manager is None: + manager = j.manager() + assert manager + else: + assert manager is j.manager() + + if not manager._enabled: + return + + opnode, outputs = manager._new_opnode([i and i.node for i in args], ctx.outputs) + + # register backward method + # tuple of backward functions corresponding to dy / dx_i + # None means y is not a function of x_i + opnode.backward, output_need_grad = builtin_op_utils.builtin_op_get_backward_fn( + op, ctx.inputs, ctx.outputs, input_requires_grad + ) + + assert len(outputs) == len(output_need_grad) + outputs = [x if y else None for (x, y) in zip(outputs, output_need_grad)] + + opnode.backward_allow_noinput = check_backward_allow_noinput(op) + + opnode.has_grad_fn = get_op_has_grad_fn(op) + + return tuple(outputs) + + +@apply.add +def _(op: Const, *_: typing.Optional[Tracer]): + return None diff --git a/imperative/python/megengine/core/ops/__init__.py b/imperative/python/megengine/core/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1207b5d98cd3578bc39e9ce600a1254a434880c8 --- /dev/null +++ b/imperative/python/megengine/core/ops/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/imperative/python/megengine/core/ops/_internal/__init__.py b/imperative/python/megengine/core/ops/_internal/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1207b5d98cd3578bc39e9ce600a1254a434880c8 --- /dev/null +++ b/imperative/python/megengine/core/ops/_internal/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/imperative/python/megengine/core/ops/_internal/all_ops.py b/imperative/python/megengine/core/ops/_internal/all_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..f1627ee978cbcc459535da8ba1af93821be6e46a --- /dev/null +++ b/imperative/python/megengine/core/ops/_internal/all_ops.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .generated_ops import * +from .misc_ops import * diff --git a/imperative/python/megengine/core/ops/_internal/enum36.py b/imperative/python/megengine/core/ops/_internal/enum36.py new file mode 100644 index 0000000000000000000000000000000000000000..1fb4bb6f424ddc270a35b3412917882929a18517 --- /dev/null +++ b/imperative/python/megengine/core/ops/_internal/enum36.py @@ -0,0 +1,929 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import sys +from functools import reduce +from operator import or_ as _or_ +from types import DynamicClassAttribute, MappingProxyType + +# try _collections first to reduce startup cost +try: + from _collections import OrderedDict +except ImportError: + from collections import OrderedDict + + +__all__ = [ + "EnumMeta", + "Enum", + "IntEnum", + "Flag", + "IntFlag", + "auto", + "unique", +] + + +def _is_descriptor(obj): + """Returns True if obj is a descriptor, False otherwise.""" + return ( + hasattr(obj, "__get__") or hasattr(obj, "__set__") or hasattr(obj, "__delete__") + ) + + +def _is_dunder(name): + """Returns True if a __dunder__ name, False otherwise.""" + return ( + name[:2] == name[-2:] == "__" + and name[2:3] != "_" + and name[-3:-2] != "_" + and len(name) > 4 + ) + + +def _is_sunder(name): + """Returns True if a _sunder_ name, False otherwise.""" + return ( + name[0] == name[-1] == "_" + and name[1:2] != "_" + and name[-2:-1] != "_" + and len(name) > 2 + ) + + +def _make_class_unpicklable(cls): + """Make the given class un-picklable.""" + + def _break_on_call_reduce(self, proto): + raise TypeError("%r cannot be pickled" % self) + + cls.__reduce_ex__ = _break_on_call_reduce + cls.__module__ = "" + + +_auto_null = object() + + +class auto: + """ + Instances are replaced with an appropriate value in Enum class suites. + """ + + value = _auto_null + + +class _EnumDict(dict): + """Track enum member order and ensure member names are not reused. + + EnumMeta will use the names found in self._member_names as the + enumeration member names. + + """ + + def __init__(self): + super().__init__() + self._member_names = [] + self._last_values = [] + + def __setitem__(self, key, value): + """Changes anything not dundered or not a descriptor. + + If an enum member name is used twice, an error is raised; duplicate + values are not checked for. + + Single underscore (sunder) names are reserved. + + """ + if _is_sunder(key): + if key not in ( + "_order_", + "_create_pseudo_member_", + "_generate_next_value_", + "_missing_", + ): + raise ValueError("_names_ are reserved for future Enum use") + if key == "_generate_next_value_": + setattr(self, "_generate_next_value", value) + elif _is_dunder(key): + if key == "__order__": + key = "_order_" + elif key in self._member_names: + # descriptor overwriting an enum? + raise TypeError("Attempted to reuse key: %r" % key) + elif not _is_descriptor(value): + if key in self: + # enum overwriting a descriptor? + raise TypeError("%r already defined as: %r" % (key, self[key])) + if isinstance(value, auto): + if value.value == _auto_null: + value.value = self._generate_next_value( + key, 1, len(self._member_names), self._last_values[:] + ) + value = value.value + self._member_names.append(key) + self._last_values.append(value) + super().__setitem__(key, value) + + +# Dummy value for Enum as EnumMeta explicitly checks for it, but of course +# until EnumMeta finishes running the first time the Enum class doesn't exist. +# This is also why there are checks in EnumMeta like `if Enum is not None` +Enum = None + + +class EnumMeta(type): + """Metaclass for Enum""" + + @classmethod + def __prepare__(metacls, cls, bases): + # create the namespace dict + enum_dict = _EnumDict() + # inherit previous flags and _generate_next_value_ function + member_type, first_enum = metacls._get_mixins_(bases) + if first_enum is not None: + enum_dict["_generate_next_value_"] = getattr( + first_enum, "_generate_next_value_", None + ) + return enum_dict + + def __new__(metacls, cls, bases, classdict): + # an Enum class is final once enumeration items have been defined; it + # cannot be mixed with other types (int, float, etc.) if it has an + # inherited __new__ unless a new __new__ is defined (or the resulting + # class will fail). + member_type, first_enum = metacls._get_mixins_(bases) + __new__, save_new, use_args = metacls._find_new_( + classdict, member_type, first_enum + ) + + # save enum items into separate mapping so they don't get baked into + # the new class + enum_members = {k: classdict[k] for k in classdict._member_names} + for name in classdict._member_names: + del classdict[name] + + # adjust the sunders + _order_ = classdict.pop("_order_", None) + + # check for illegal enum names (any others?) + invalid_names = set(enum_members) & { + "mro", + } + if invalid_names: + raise ValueError( + "Invalid enum member name: {0}".format(",".join(invalid_names)) + ) + + # create a default docstring if one has not been provided + if "__doc__" not in classdict: + classdict["__doc__"] = "An enumeration." + + # create our new Enum type + enum_class = super().__new__(metacls, cls, bases, classdict) + enum_class._member_names_ = [] # names in definition order + enum_class._member_map_ = OrderedDict() # name->value map + enum_class._member_type_ = member_type + + # save attributes from super classes so we know if we can take + # the shortcut of storing members in the class dict + base_attributes = {a for b in enum_class.mro() for a in b.__dict__} + + # Reverse value->name map for hashable values. + enum_class._value2member_map_ = {} + + # If a custom type is mixed into the Enum, and it does not know how + # to pickle itself, pickle.dumps will succeed but pickle.loads will + # fail. Rather than have the error show up later and possibly far + # from the source, sabotage the pickle protocol for this class so + # that pickle.dumps also fails. + # + # However, if the new class implements its own __reduce_ex__, do not + # sabotage -- it's on them to make sure it works correctly. We use + # __reduce_ex__ instead of any of the others as it is preferred by + # pickle over __reduce__, and it handles all pickle protocols. + if "__reduce_ex__" not in classdict: + if member_type is not object: + methods = ( + "__getnewargs_ex__", + "__getnewargs__", + "__reduce_ex__", + "__reduce__", + ) + if not any(m in member_type.__dict__ for m in methods): + _make_class_unpicklable(enum_class) + + # instantiate them, checking for duplicates as we go + # we instantiate first instead of checking for duplicates first in case + # a custom __new__ is doing something funky with the values -- such as + # auto-numbering ;) + for member_name in classdict._member_names: + value = enum_members[member_name] + if not isinstance(value, tuple): + args = (value,) + else: + args = value + if member_type is tuple: # special case for tuple enums + args = (args,) # wrap it one more time + if not use_args: + enum_member = __new__(enum_class) + if not hasattr(enum_member, "_value_"): + enum_member._value_ = value + else: + enum_member = __new__(enum_class, *args) + if not hasattr(enum_member, "_value_"): + if member_type is object: + enum_member._value_ = value + else: + enum_member._value_ = member_type(*args) + value = enum_member._value_ + enum_member._name_ = member_name + enum_member.__objclass__ = enum_class + enum_member.__init__(*args) + # If another member with the same value was already defined, the + # new member becomes an alias to the existing one. + for name, canonical_member in enum_class._member_map_.items(): + if canonical_member._value_ == enum_member._value_: + enum_member = canonical_member + break + else: + # Aliases don't appear in member names (only in __members__). + enum_class._member_names_.append(member_name) + # performance boost for any member that would not shadow + # a DynamicClassAttribute + if member_name not in base_attributes: + setattr(enum_class, member_name, enum_member) + # now add to _member_map_ + enum_class._member_map_[member_name] = enum_member + try: + # This may fail if value is not hashable. We can't add the value + # to the map, and by-value lookups for this value will be + # linear. + enum_class._value2member_map_[value] = enum_member + except TypeError: + pass + + # double check that repr and friends are not the mixin's or various + # things break (such as pickle) + for name in ("__repr__", "__str__", "__format__", "__reduce_ex__"): + class_method = getattr(enum_class, name) + obj_method = getattr(member_type, name, None) + enum_method = getattr(first_enum, name, None) + if obj_method is not None and obj_method is class_method: + setattr(enum_class, name, enum_method) + + # replace any other __new__ with our own (as long as Enum is not None, + # anyway) -- again, this is to support pickle + if Enum is not None: + # if the user defined their own __new__, save it before it gets + # clobbered in case they subclass later + if save_new: + enum_class.__new_member__ = __new__ + enum_class.__new__ = Enum.__new__ + + # py3 support for definition order (helps keep py2/py3 code in sync) + if _order_ is not None: + if isinstance(_order_, str): + _order_ = _order_.replace(",", " ").split() + if _order_ != enum_class._member_names_: + raise TypeError("member order does not match _order_") + + return enum_class + + def __bool__(self): + """ + classes/types should always be True. + """ + return True + + def __call__( + cls, value, names=None, *, module=None, qualname=None, type=None, start=1 + ): + """Either returns an existing member, or creates a new enum class. + + This method is used both when an enum class is given a value to match + to an enumeration member (i.e. Color(3)) and for the functional API + (i.e. Color = Enum('Color', names='RED GREEN BLUE')). + + When used for the functional API: + + `value` will be the name of the new class. + + `names` should be either a string of white-space/comma delimited names + (values will start at `start`), or an iterator/mapping of name, value pairs. + + `module` should be set to the module this class is being created in; + if it is not set, an attempt to find that module will be made, but if + it fails the class will not be picklable. + + `qualname` should be set to the actual location this class can be found + at in its module; by default it is set to the global scope. If this is + not correct, unpickling will fail in some circumstances. + + `type`, if set, will be mixed in as the first base class. + + """ + if names is None: # simple value lookup + return cls.__new__(cls, value) + # otherwise, functional API: we're creating a new Enum type + return cls._create_( + value, names, module=module, qualname=qualname, type=type, start=start + ) + + def __contains__(cls, member): + return isinstance(member, cls) and member._name_ in cls._member_map_ + + def __delattr__(cls, attr): + # nicer error message when someone tries to delete an attribute + # (see issue19025). + if attr in cls._member_map_: + raise AttributeError("%s: cannot delete Enum member." % cls.__name__) + super().__delattr__(attr) + + def __dir__(self): + return [ + "__class__", + "__doc__", + "__members__", + "__module__", + ] + self._member_names_ + + def __getattr__(cls, name): + """Return the enum member matching `name` + + We use __getattr__ instead of descriptors or inserting into the enum + class' __dict__ in order to support `name` and `value` being both + properties for enum members (which live in the class' __dict__) and + enum members themselves. + + """ + if _is_dunder(name): + raise AttributeError(name) + try: + return cls._member_map_[name] + except KeyError: + raise AttributeError(name) from None + + def __getitem__(cls, name): + return cls._member_map_[name] + + def __iter__(cls): + return (cls._member_map_[name] for name in cls._member_names_) + + def __len__(cls): + return len(cls._member_names_) + + @property + def __members__(cls): + """Returns a mapping of member name->value. + + This mapping lists all enum members, including aliases. Note that this + is a read-only view of the internal mapping. + + """ + return MappingProxyType(cls._member_map_) + + def __repr__(cls): + return "" % cls.__name__ + + def __reversed__(cls): + return (cls._member_map_[name] for name in reversed(cls._member_names_)) + + def __setattr__(cls, name, value): + """Block attempts to reassign Enum members. + + A simple assignment to the class namespace only changes one of the + several possible ways to get an Enum member from the Enum class, + resulting in an inconsistent Enumeration. + + """ + member_map = cls.__dict__.get("_member_map_", {}) + if name in member_map: + raise AttributeError("Cannot reassign members.") + super().__setattr__(name, value) + + def _create_( + cls, class_name, names=None, *, module=None, qualname=None, type=None, start=1 + ): + """Convenience method to create a new Enum class. + + `names` can be: + + * A string containing member names, separated either with spaces or + commas. Values are incremented by 1 from `start`. + * An iterable of member names. Values are incremented by 1 from `start`. + * An iterable of (member name, value) pairs. + * A mapping of member name -> value pairs. + + """ + metacls = cls.__class__ + bases = (cls,) if type is None else (type, cls) + _, first_enum = cls._get_mixins_(bases) + classdict = metacls.__prepare__(class_name, bases) + + # special processing needed for names? + if isinstance(names, str): + names = names.replace(",", " ").split() + if isinstance(names, (tuple, list)) and names and isinstance(names[0], str): + original_names, names = names, [] + last_values = [] + for count, name in enumerate(original_names): + value = first_enum._generate_next_value_( + name, start, count, last_values[:] + ) + last_values.append(value) + names.append((name, value)) + + # Here, names is either an iterable of (name, value) or a mapping. + for item in names: + if isinstance(item, str): + member_name, member_value = item, names[item] + else: + member_name, member_value = item + classdict[member_name] = member_value + enum_class = metacls.__new__(metacls, class_name, bases, classdict) + + # TODO: replace the frame hack if a blessed way to know the calling + # module is ever developed + if module is None: + try: + module = sys._getframe(2).f_globals["__name__"] + except (AttributeError, ValueError) as exc: + pass + if module is None: + _make_class_unpicklable(enum_class) + else: + enum_class.__module__ = module + if qualname is not None: + enum_class.__qualname__ = qualname + + return enum_class + + @staticmethod + def _get_mixins_(bases): + """Returns the type for creating enum members, and the first inherited + enum class. + + bases: the tuple of bases that was given to __new__ + + """ + if not bases: + return object, Enum + + # double check that we are not subclassing a class with existing + # enumeration members; while we're at it, see if any other data + # type has been mixed in so we can use the correct __new__ + member_type = first_enum = None + for base in bases: + if base is not Enum and issubclass(base, Enum) and base._member_names_: + raise TypeError("Cannot extend enumerations") + # base is now the last base in bases + if not issubclass(base, Enum): + raise TypeError( + "new enumerations must be created as " + "`ClassName([mixin_type,] enum_type)`" + ) + + # get correct mix-in type (either mix-in type of Enum subclass, or + # first base if last base is Enum) + if not issubclass(bases[0], Enum): + member_type = bases[0] # first data type + first_enum = bases[-1] # enum type + else: + for base in bases[0].__mro__: + # most common: (IntEnum, int, Enum, object) + # possible: (, , + # , , + # ) + if issubclass(base, Enum): + if first_enum is None: + first_enum = base + else: + if member_type is None: + member_type = base + + return member_type, first_enum + + @staticmethod + def _find_new_(classdict, member_type, first_enum): + """Returns the __new__ to be used for creating the enum members. + + classdict: the class dictionary given to __new__ + member_type: the data type whose __new__ will be used by default + first_enum: enumeration to check for an overriding __new__ + + """ + # now find the correct __new__, checking to see of one was defined + # by the user; also check earlier enum classes in case a __new__ was + # saved as __new_member__ + __new__ = classdict.get("__new__", None) + + # should __new__ be saved as __new_member__ later? + save_new = __new__ is not None + + if __new__ is None: + # check all possibles for __new_member__ before falling back to + # __new__ + for method in ("__new_member__", "__new__"): + for possible in (member_type, first_enum): + target = getattr(possible, method, None) + if target not in { + None, + None.__new__, + object.__new__, + Enum.__new__, + }: + __new__ = target + break + if __new__ is not None: + break + else: + __new__ = object.__new__ + + # if a non-object.__new__ is used then whatever value/tuple was + # assigned to the enum member name will be passed to __new__ and to the + # new enum member's __init__ + if __new__ is object.__new__: + use_args = False + else: + use_args = True + + return __new__, save_new, use_args + + +class Enum(metaclass=EnumMeta): + """Generic enumeration. + + Derive from this class to define new enumerations. + + """ + + def __new__(cls, value): + # all enum instances are actually created during class construction + # without calling this method; this method is called by the metaclass' + # __call__ (i.e. Color(3) ), and by pickle + if type(value) is cls: + # For lookups like Color(Color.RED) + return value + # by-value search for a matching enum member + # see if it's in the reverse mapping (for hashable values) + try: + if value in cls._value2member_map_: + return cls._value2member_map_[value] + except TypeError: + # not there, now do long search -- O(n) behavior + for member in cls._member_map_.values(): + if member._value_ == value: + return member + # still not found -- try _missing_ hook + return cls._missing_(value) + + def _generate_next_value_(name, start, count, last_values): + for last_value in reversed(last_values): + try: + return last_value + 1 + except TypeError: + pass + else: + return start + + @classmethod + def _missing_(cls, value): + raise ValueError("%r is not a valid %s" % (value, cls.__name__)) + + def __repr__(self): + return "<%s.%s: %r>" % (self.__class__.__name__, self._name_, self._value_) + + def __str__(self): + return "%s.%s" % (self.__class__.__name__, self._name_) + + def __dir__(self): + added_behavior = [ + m + for cls in self.__class__.mro() + for m in cls.__dict__ + if m[0] != "_" and m not in self._member_map_ + ] + return ["__class__", "__doc__", "__module__"] + added_behavior + + def __format__(self, format_spec): + # mixed-in Enums should use the mixed-in type's __format__, otherwise + # we can get strange results with the Enum name showing up instead of + # the value + + # pure Enum branch + if self._member_type_ is object: + cls = str + val = str(self) + # mix-in branch + else: + cls = self._member_type_ + val = self._value_ + return cls.__format__(val, format_spec) + + def __hash__(self): + return hash(self._name_) + + def __reduce_ex__(self, proto): + return self.__class__, (self._value_,) + + # DynamicClassAttribute is used to provide access to the `name` and + # `value` properties of enum members while keeping some measure of + # protection from modification, while still allowing for an enumeration + # to have members named `name` and `value`. This works because enumeration + # members are not set directly on the enum class -- __getattr__ is + # used to look them up. + + @DynamicClassAttribute + def name(self): + """The name of the Enum member.""" + return self._name_ + + @DynamicClassAttribute + def value(self): + """The value of the Enum member.""" + return self._value_ + + @classmethod + def _convert(cls, name, module, filter, source=None): + """ + Create a new Enum subclass that replaces a collection of global constants + """ + # convert all constants from source (or module) that pass filter() to + # a new Enum called name, and export the enum and its members back to + # module; + # also, replace the __reduce_ex__ method so unpickling works in + # previous Python versions + module_globals = vars(sys.modules[module]) + if source: + source = vars(source) + else: + source = module_globals + # We use an OrderedDict of sorted source keys so that the + # _value2member_map is populated in the same order every time + # for a consistent reverse mapping of number to name when there + # are multiple names for the same number rather than varying + # between runs due to hash randomization of the module dictionary. + members = [(name, source[name]) for name in source.keys() if filter(name)] + try: + # sort by value + members.sort(key=lambda t: (t[1], t[0])) + except TypeError: + # unless some values aren't comparable, in which case sort by name + members.sort(key=lambda t: t[0]) + cls = cls(name, members, module=module) + cls.__reduce_ex__ = _reduce_ex_by_name + module_globals.update(cls.__members__) + module_globals[name] = cls + return cls + + +class IntEnum(int, Enum): + """Enum where members are also (and must be) ints""" + + +def _reduce_ex_by_name(self, proto): + return self.name + + +class Flag(Enum): + """Support for flags""" + + def _generate_next_value_(name, start, count, last_values): + """ + Generate the next value when not given. + + name: the name of the member + start: the initital start value or None + count: the number of existing members + last_value: the last value assigned or None + """ + if not count: + return start if start is not None else 1 + for last_value in reversed(last_values): + try: + high_bit = _high_bit(last_value) + break + except Exception: + raise TypeError("Invalid Flag value: %r" % last_value) from None + return 2 ** (high_bit + 1) + + @classmethod + def _missing_(cls, value): + original_value = value + if value < 0: + value = ~value + possible_member = cls._create_pseudo_member_(value) + if original_value < 0: + possible_member = ~possible_member + return possible_member + + @classmethod + def _create_pseudo_member_(cls, value): + """ + Create a composite member iff value contains only members. + """ + pseudo_member = cls._value2member_map_.get(value, None) + if pseudo_member is None: + # verify all bits are accounted for + _, extra_flags = _decompose(cls, value) + if extra_flags: + raise ValueError("%r is not a valid %s" % (value, cls.__name__)) + # construct a singleton enum pseudo-member + pseudo_member = object.__new__(cls) + pseudo_member._name_ = None + pseudo_member._value_ = value + # use setdefault in case another thread already created a composite + # with this value + pseudo_member = cls._value2member_map_.setdefault(value, pseudo_member) + return pseudo_member + + def __contains__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return other._value_ & self._value_ == other._value_ + + def __repr__(self): + cls = self.__class__ + if self._name_ is not None: + return "<%s.%s: %r>" % (cls.__name__, self._name_, self._value_) + members, uncovered = _decompose(cls, self._value_) + return "<%s.%s: %r>" % ( + cls.__name__, + "|".join([str(m._name_ or m._value_) for m in members]), + self._value_, + ) + + def __str__(self): + cls = self.__class__ + if self._name_ is not None: + return "%s.%s" % (cls.__name__, self._name_) + members, uncovered = _decompose(cls, self._value_) + if len(members) == 1 and members[0]._name_ is None: + return "%s.%r" % (cls.__name__, members[0]._value_) + else: + return "%s.%s" % ( + cls.__name__, + "|".join([str(m._name_ or m._value_) for m in members]), + ) + + def __bool__(self): + return bool(self._value_) + + def __or__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self.__class__(self._value_ | other._value_) + + def __and__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self.__class__(self._value_ & other._value_) + + def __xor__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self.__class__(self._value_ ^ other._value_) + + def __invert__(self): + members, uncovered = _decompose(self.__class__, self._value_) + inverted_members = [ + m + for m in self.__class__ + if m not in members and not m._value_ & self._value_ + ] + inverted = reduce(_or_, inverted_members, self.__class__(0)) + return self.__class__(inverted) + + +class IntFlag(int, Flag): + """Support for integer-based Flags""" + + @classmethod + def _missing_(cls, value): + if not isinstance(value, int): + raise ValueError("%r is not a valid %s" % (value, cls.__name__)) + new_member = cls._create_pseudo_member_(value) + return new_member + + @classmethod + def _create_pseudo_member_(cls, value): + pseudo_member = cls._value2member_map_.get(value, None) + if pseudo_member is None: + need_to_create = [value] + # get unaccounted for bits + _, extra_flags = _decompose(cls, value) + # timer = 10 + while extra_flags: + # timer -= 1 + bit = _high_bit(extra_flags) + flag_value = 2 ** bit + if ( + flag_value not in cls._value2member_map_ + and flag_value not in need_to_create + ): + need_to_create.append(flag_value) + if extra_flags == -flag_value: + extra_flags = 0 + else: + extra_flags ^= flag_value + for value in reversed(need_to_create): + # construct singleton pseudo-members + pseudo_member = int.__new__(cls, value) + pseudo_member._name_ = None + pseudo_member._value_ = value + # use setdefault in case another thread already created a composite + # with this value + pseudo_member = cls._value2member_map_.setdefault(value, pseudo_member) + return pseudo_member + + def __or__(self, other): + if not isinstance(other, (self.__class__, int)): + return NotImplemented + result = self.__class__(self._value_ | self.__class__(other)._value_) + return result + + def __and__(self, other): + if not isinstance(other, (self.__class__, int)): + return NotImplemented + return self.__class__(self._value_ & self.__class__(other)._value_) + + def __xor__(self, other): + if not isinstance(other, (self.__class__, int)): + return NotImplemented + return self.__class__(self._value_ ^ self.__class__(other)._value_) + + __ror__ = __or__ + __rand__ = __and__ + __rxor__ = __xor__ + + def __invert__(self): + result = self.__class__(~self._value_) + return result + + +def _high_bit(value): + """returns index of highest bit, or -1 if value is zero or negative""" + return value.bit_length() - 1 + + +def unique(enumeration): + """Class decorator for enumerations ensuring unique member values.""" + duplicates = [] + for name, member in enumeration.__members__.items(): + if name != member.name: + duplicates.append((name, member.name)) + if duplicates: + alias_details = ", ".join( + ["%s -> %s" % (alias, name) for (alias, name) in duplicates] + ) + raise ValueError( + "duplicate values found in %r: %s" % (enumeration, alias_details) + ) + return enumeration + + +def _decompose(flag, value): + """Extract all members from the value.""" + # _decompose is only called if the value is not named + not_covered = value + negative = value < 0 + # issue29167: wrap accesses to _value2member_map_ in a list to avoid race + # conditions between iterating over it and having more psuedo- + # members added to it + if negative: + # only check for named flags + flags_to_check = [ + (m, v) + for v, m in list(flag._value2member_map_.items()) + if m.name is not None + ] + else: + # check for named flags and powers-of-two flags + flags_to_check = [ + (m, v) + for v, m in list(flag._value2member_map_.items()) + if m.name is not None or _power_of_two(v) + ] + members = [] + for member, member_value in flags_to_check: + if member_value and member_value & value == member_value: + members.append(member) + not_covered &= ~member_value + if not members and value in flag._value2member_map_: + members.append(flag._value2member_map_[value]) + members.sort(key=lambda m: m._value_, reverse=True) + if len(members) > 1 and members[0].value == value: + # we have the breakdown, don't need the value member itself + members.pop(0) + return members, not_covered + + +def _power_of_two(value): + if value < 1: + return False + return value == 2 ** _high_bit(value) diff --git a/imperative/python/megengine/core/ops/_internal/helper.py b/imperative/python/megengine/core/ops/_internal/helper.py new file mode 100644 index 0000000000000000000000000000000000000000..52af3aa0a6499621bbf2ee2a2b329cd32ffaf6bd --- /dev/null +++ b/imperative/python/megengine/core/ops/_internal/helper.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import warnings + +from ..._imperative_rt.ops import OprAttr +from . import param_defs + + +def make_param(param, ptype, kwargs): + if param is not None: + if isinstance(param, ptype): + return param + + param = [param] + assert len(param) == len( + ptype.__slots__ + ), "{} needs {} params, but {} are provided".format( + ptype, len(ptype.__slots__), len(param) + ) + return ptype(*param) + + ckw = {} + for i in ptype.__slots__: + val = kwargs.pop(i, ckw) + if val is not ckw: + ckw[i] = val + return ptype(**ckw) + + +class PodOpVisitor: + __name2subclass = {} + __c = None + + name = None + param_names = [] + config = None + + def __init__(self, config, **params): + self.config = config + assert set(params) == set(self.param_names) + self.__dict__.update(params) + + def __init_subclass__(cls, **kwargs): + super().__init_subclass__(**kwargs) # python 3.5 does not have this + name = cls.name + if name in cls.__name2subclass: + if not issubclass(cls, cls.__name2subclass[name]): + warnings.warn("Multiple subclasses for bultin op: %s" % name) + cls.__name2subclass[name] = cls + + def to_c(self): + if self.__c: + return self.__c + op = OprAttr() + op.type = self.name + if self.config is not None: + op.config = self.config + # first 4 bytes is TAG, has to remove them currently + op.param = b"".join(self.__dict__[k].serialize()[4:] for k in self.param_names) + self.__c = op + return op + + def __eq__(self, rhs): + return self.to_c() == rhs.to_c() + + def __repr__(self): + name = self.__class__.__name__ + + if self.__c: + return "{}()".format(name) + + kwargs = {} + for i in self.param_names: + p = self.__dict__[i] + if isinstance(p, param_defs._ParamDefBase): + for k in p.__slots__: + v = getattr(p, k) + if isinstance(v, param_defs._EnumBase): + v = v.name + kwargs[k] = repr(v) + else: + kwargs[i] = repr(p) + if self.config: + if len(self.config.comp_node_arr) == 1: + kwargs["device"] = "'%s'" % self.config.comp_node + return "{}({})".format( + name, ", ".join("{}={}".format(k, v) for k, v in kwargs.items()) + ) diff --git a/imperative/python/megengine/core/ops/_internal/misc_ops.py b/imperative/python/megengine/core/ops/_internal/misc_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..e02ddee95c8a693df7f39cbc492f1152ebd27bcd --- /dev/null +++ b/imperative/python/megengine/core/ops/_internal/misc_ops.py @@ -0,0 +1,194 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections +import ctypes + +from ..._imperative_rt import OperatorNodeConfig as Config +from . import param_defs +from .helper import PodOpVisitor, make_param + +__all__ = ["ConvolutionBackwardData", "Dimshuffle", "Reshape", "AxisAddRemove"] + + +class TensorShape: + MAX_NDIM = 7 + + +class ConvolutionBackwardData(PodOpVisitor): + param_names = ( + "param", + "execution_polity", + ) + name = "ConvolutionBackwardDataV1" + + def __init__( + self, + *, + param=None, + execution_polity=None, + name=None, + comp_node=None, + config=None, + dtype=None, + **kwargs + ): + config = config or Config() + if name: + config.name = name + if comp_node: + config.comp_node = comp_node + if dtype: + config.dtype = dtype + self.config = config + + self.param = make_param(param, param_defs.Convolution, kwargs) + self.execution_polity = make_param( + execution_polity, param_defs.ExecutionPolicy, kwargs + ) + assert not kwargs, "extra kwargs: {}".format(kwargs) + + +class Dimshuffle(PodOpVisitor): + name = "Dimshuffle" + param_names = ("pattern",) + + class Pattern(ctypes.Structure): + Pattern_Array = ctypes.c_int32 * TensorShape.MAX_NDIM + _fields_ = [ + ("length", ctypes.c_uint32), + ("pattern", Pattern_Array), + ("ndim", ctypes.c_uint32), + ] + + def serialize(self): + return bytes(ctypes.c_uint32(0)) + bytes(self) + + def __init__(self, pattern, ndim=0): + assert isinstance(pattern, collections.Iterable) + assert len(pattern) <= TensorShape.MAX_NDIM + pattern_array = Dimshuffle.Pattern.Pattern_Array() + for idx, v in enumerate(pattern): + pattern_array[idx] = ctypes.c_int32(-1 if v == "x" else int(v)) + self.pattern = Dimshuffle.Pattern(len(pattern), pattern_array, ndim) + + +class Reshape(PodOpVisitor): + name = "ReshapeV1" + param_names = ("unspec_axis",) + + def __init__(self, unspec_axis=None): + if unspec_axis is None: + self.unspec_axis = param_defs.OptionalAxisV1() + else: + self.unspec_axis = param_defs.OptionalAxisV1(unspec_axis) + + +class AxisNum(ctypes.Structure): + _fields_ = [ + ("m_num", ctypes.c_int), + ] + + +class AxisDesc(ctypes.Structure): + class Method(ctypes.c_int): + ADD_1 = 0 + REMOVE = 1 + + _fields_ = [ + ("method", Method), + ("axis", AxisNum), + ] + + @classmethod + def make_add(cls, axis): + return cls(cls.Method.ADD_1, AxisNum(axis)) + + @classmethod + def make_remove(cls, axis): + return cls(cls.Method.REMOVE, AxisNum(axis)) + + +class AxisAddRemove(PodOpVisitor): + name = "AxisAddRemove" + param_names = ("param",) + + AxisDesc = AxisDesc + + class Param(ctypes.Structure): + MAX_DESC_SIZE = TensorShape.MAX_NDIM * 2 + + _fields_ = [("nr_desc", ctypes.c_uint32), ("desc", AxisDesc * MAX_DESC_SIZE)] + + def __init__(self, *args): + super().__init__() + self.nr_desc = len(args) + for i, a in enumerate(args): + self.desc[i] = a + + def serialize(self): + return bytes(ctypes.c_uint32(0)) + bytes(self) + + def __init__(self, param): + assert isinstance(param, self.Param) + self.param = param + + +del AxisDesc + + +class IndexingOpBase(PodOpVisitor): + param_names = ("index_desc",) + + class IndexDescMaskDump(ctypes.Structure): + class Item(ctypes.Structure): + _fields_ = [ + ("axis", ctypes.c_int8), + ("begin", ctypes.c_bool), + ("end", ctypes.c_bool), + ("step", ctypes.c_bool), + ("idx", ctypes.c_bool), + ] + + Item_Array = Item * TensorShape.MAX_NDIM + + _fields_ = [("nr_item", ctypes.c_uint8), ("items", Item_Array)] + + def serialize(self): + return bytes(ctypes.c_uint32(0)) + bytes(self) + + def __init__(self, items): + nr_item = len(items) + assert nr_item <= TensorShape.MAX_NDIM + item_array = IndexingOpBase.IndexDescMaskDump.Item_Array() + for idx, item in enumerate(items): + assert isinstance(item, (tuple, list)) and len(item) == 5 + item_array[idx] = IndexingOpBase.IndexDescMaskDump.Item(*item) + self.index_desc = IndexingOpBase.IndexDescMaskDump(nr_item, item_array) + + +def _gen_indexing_defs(*names): + for name in names: + globals()[name] = type(name, (IndexingOpBase,), dict(name=name)) + __all__.append(name) + + +_gen_indexing_defs( + "Subtensor", + "SetSubtensor", + "IncrSubtensor", + "IndexingMultiAxisVec", + "IndexingSetMultiAxisVec", + "IndexingIncrMultiAxisVec", + "MeshIndexing", + "IncrMeshIndexing", + "SetMeshIndexing", + "BatchedMeshIndexing", + "BatchedIncrMeshIndexing", + "BatchedSetMeshIndexing", +) diff --git a/imperative/python/megengine/core/ops/builtin/__init__.py b/imperative/python/megengine/core/ops/builtin/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4656cbd2890bed2a80e50b54470f1c5f63357267 --- /dev/null +++ b/imperative/python/megengine/core/ops/builtin/__init__.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import warnings +from typing import Union + +from ..._imperative_rt import OpDef, ops +from ...tensor.core import OpBase, TensorBase, TensorWrapperBase, apply +from .._internal import all_ops +from .._internal.helper import PodOpVisitor + +# register OpDef as a "virtual subclass" of OpBase, so any of registered +# apply(OpBase, ...) rules could work well on OpDef +OpBase.register(OpDef) + +# forward to apply(OpDef, ...) +@apply.add +def _(op: PodOpVisitor, *args: Union[TensorBase, TensorWrapperBase]): + return apply(op.to_c(), *args) + + +__all__ = ["OpDef", "PodOpVisitor"] + +for k, v in all_ops.__dict__.items(): + if isinstance(v, type) and issubclass(v, PodOpVisitor): + globals()[k] = v + __all__.append(k) + +for k, v in ops.__dict__.items(): + if isinstance(v, type) and issubclass(v, OpDef): + globals()[k] = v + __all__.append(k) diff --git a/imperative/python/megengine/core/ops/special.py b/imperative/python/megengine/core/ops/special.py new file mode 100644 index 0000000000000000000000000000000000000000..e427c8f592bd07ce6ec4ee248137f097fe890b00 --- /dev/null +++ b/imperative/python/megengine/core/ops/special.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from ..tensor.core import OpBase, TensorBase, apply + + +class Const(OpBase): + def __init__(self, value=None, *, dtype=None, device=None): + self.value = value + self.dtype = dtype + self.device = device diff --git a/imperative/python/megengine/core/tensor/__init__.py b/imperative/python/megengine/core/tensor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e008c110f3e4280d7ac40b666c80a6b38f89a940 --- /dev/null +++ b/imperative/python/megengine/core/tensor/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .tensor_wrapper import TensorWrapper as Tensor diff --git a/imperative/python/megengine/core/tensor/core.py b/imperative/python/megengine/core/tensor/core.py new file mode 100644 index 0000000000000000000000000000000000000000..3a09f5246fc7acf68670a68b9fb1d06dafe7feb5 --- /dev/null +++ b/imperative/python/megengine/core/tensor/core.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections +import functools +import inspect +import sys +import typing +from abc import ABC + +import multipledispatch + + +class OpBase(ABC): + def __call__(self, *args): + return apply(self, *args) + + +class TensorBase: + pass + + +class TensorWrapperBase: + pass + + +class Dispatcher(multipledispatch.Dispatcher): + def add(self, f, g=None): + if g is None: + super().add(get_signature(f), f) + else: + super().add(f, g) + + return f + + def __get__(self, instance, owner=None): + if instance is not None: + return self + return functools.partial(self, instance) + + +if sys.version_info < (3, 6): + + def parse_union(ann): + if type(ann) is not typing.UnionMeta: + return + return ann.__union_params__ + + +elif sys.version_info < (3, 7): + + def parse_union(ann): + if type(ann) is not typing._Union: + return + return ann.__args__ + + +elif sys.version_info < (3, 8): + + def parse_union(ann): + if type(ann) is not typing._GenericAlias: + if type(ann) is not typing.Union: + return + else: + if ann.__origin__ is not typing.Union: + return + return ann.__args__ + + +else: + + def parse_union(ann): + if typing.get_origin(ann) is not typing.Union: + return + return typing.get_args(ann) + + +def get_signature(function, op_type=None): + sig = inspect.signature(function) + types = [] + for p in sig.parameters.values(): + ann = p.annotation + ann = parse_union(ann) or ann + if p.kind in ( + inspect.Parameter.POSITIONAL_ONLY, + inspect.Parameter.POSITIONAL_OR_KEYWORD, + ): + types.append(ann) + if p.kind == inspect.Parameter.VAR_POSITIONAL: + types.append([ann]) + return tuple(types) + + +apply = Dispatcher("apply") + +OpBase.apply = apply + + +@apply.add +def _(op: OpBase, *args: TensorBase): + raise NotImplementedError + + +@apply.add +def _(op: OpBase, *args: TensorWrapperBase): + assert args + Wrapper = type(args[0]) + outputs = apply(op, *(i.__wrapped__ for i in args)) + assert isinstance(outputs, tuple) + return tuple(map(Wrapper, outputs)) diff --git a/imperative/python/megengine/core/tensor/dtype.py b/imperative/python/megengine/core/tensor/dtype.py new file mode 100644 index 0000000000000000000000000000000000000000..85c22bb7a7258b170cd90c700363e18bce6170f6 --- /dev/null +++ b/imperative/python/megengine/core/tensor/dtype.py @@ -0,0 +1,289 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +import collections +from typing import Union + +import numpy as np + +# normal dtype related +from .._imperative_rt import bfloat16, intb1, intb2, intb4 + + +def is_lowbit(dtype): + return (dtype is intb1) or (dtype is intb2) or (dtype is intb4) + + +def is_bfloat16(dtype): + return dtype is bfloat16 + + +# quantization dtype related +_QuantDtypeMetadata = collections.namedtuple( + "QuantDtypeMetadata", ["name", "np_dtype_str", "is_unsigned", "qmin", "qmax",] +) + +_metadata_dict = { + "quint8": _QuantDtypeMetadata("Quantized8Asymm", "uint8", True, 0, 255), + "qint8": _QuantDtypeMetadata("QuantizedS8", "int8", False, -128, 127), + "quint4": _QuantDtypeMetadata("Quantized4Asymm", "uint8", True, 0, 15), + "qint4": _QuantDtypeMetadata("QuantizedS4", "int8", False, -8, 7), + "qint32": _QuantDtypeMetadata( + "QuantizedS32", "int32", False, -(2 ** 31), 2 ** 31 - 1, + ), + # NOTE: int2 is not supported for model dump yet + "quint2": _QuantDtypeMetadata(None, "uint8", True, 0, 3), + "qint2": _QuantDtypeMetadata(None, "int8", False, -2, 1), +} + + +def is_quantize(dtype): + return ( + hasattr(dtype, "metadata") + and dtype.metadata is not None + and "mgb_dtype" in dtype.metadata + ) + + +def get_scale(dtype): + assert is_quantize(dtype) + return dtype.metadata["mgb_dtype"]["scale"] + + +def get_zero_point(dtype): + assert is_quantize(dtype) + metadata = dtype.metadata["mgb_dtype"] + assert metadata["name"] in ("Quantized8Asymm", "Quantized4Asymm") + return metadata["zero_point"] + + +def _check_zero_point(zp: int, dtype_str: str): + qmin = _metadata_dict[dtype_str].qmin + qmax = _metadata_dict[dtype_str].qmax + if zp < qmin or zp > qmax: + raise ValueError( + "zero_point should be within [{}, {}] for {}".format(qmin, qmax, dtype_str) + ) + + +def get_quantized_dtype(dtype_str: str, scale: float, zp: Union[int, None]): + r""" + Get quantized dtype with metadata attribute according to _metadata_dict. + + Note that unsigned dtype must have ``zero_point`` and signed dtype must + not have ``zero_point``, to be consitent with tensor generated by calling + compiled function from `CompGraph.compile(inputs, outspec)`. + + :param dtype: a string indicating which dtype to return + :param scale: a number for scale to store in dtype's metadata + :param zp: a number for zero_point to store in dtype's metadata + """ + metadata = _metadata_dict[dtype_str] + np_dtype_str = metadata.np_dtype_str + is_unsigned = metadata.is_unsigned + if is_unsigned: + if zp is None or int(zp) != zp: + raise ValueError("zero_point should be an integer") + zp = int(zp) + _check_zero_point(zp, dtype_str) + return np.dtype( + np_dtype_str, + metadata={ + "mgb_dtype": { + "name": metadata.name, + "scale": float(scale), + "zero_point": zp, + } + }, + ) + else: + return np.dtype( + np_dtype_str, + metadata={"mgb_dtype": {"name": metadata.name, "scale": float(scale)}}, + ) + + +def quint8(scale, zero_point): + """ + Consturct a quantized unsigned int8 data type with ``scale`` (float) and + ``zero_point`` (uint8). The real value represented by a quint8 data type is + float_val = scale * (uint8_val - zero_point) + """ + return get_quantized_dtype("quint8", scale, zero_point) + + +def qint8(scale): + """ + Construct a quantized int8 data type with ``scale`` (float). The real value + represented by a qint8 data type is float_val = scale * int8_val + """ + return get_quantized_dtype("qint8", scale, None) + + +def qint32(scale): + """ + Construct a quantized int32 data type with ``scale`` (float). The real value + represented by a qint32 data type is float_val = scale * int32_val + """ + return get_quantized_dtype("qint32", scale, None) + + +def quint4(scale, zero_point): + """ + Consturct a quantized unsigned int4 data type with ``scale`` (float) and + ``zero_point`` (uint8). The real value represented by a quint4 data type is + float_val = scale * (uint4_val - zero_point) + """ + return get_quantized_dtype("quint4", scale, zero_point) + + +def qint4(scale): + """ + Construct a quantized int4 data type with ``scale`` (float). The real value + represented by a qint4 data type is float_val = scale * int4_val + """ + return get_quantized_dtype("qint4", scale, None) + + +def _convert_to_quantized_dtype(arr: np.ndarray, dtype: np.dtype, dtype_str: str): + metadata = _metadata_dict[dtype_str] + arr_metadata = dtype.metadata["mgb_dtype"] + if not isinstance(arr, np.ndarray): + raise ValueError("arr parameter should be instance of np.ndarray") + if not is_quantize(dtype) or arr_metadata["name"] != metadata.name: + raise ValueError("dtype parameter should be a {} dtype".format(dtype_str)) + is_unsigned = metadata.is_unsigned + if is_unsigned: + scale, zp = ( + arr_metadata["scale"], + arr_metadata["zero_point"], + ) + return ( + (np.round(arr / scale) + zp) + .clip(metadata.qmin, metadata.qmax) + .astype(dtype) + ) + else: + # don't trick to combine with is_unsigned, seeing ``get_quantized_dtype`` + scale = arr_metadata["scale"] + return np.round(arr / scale).clip(metadata.qmin, metadata.qmax).astype(dtype) + + +def _convert_from_quantized_dtype(arr: np.ndarray, dtype_str: str): + metadata = _metadata_dict[dtype_str] + arr_metadata = arr.dtype.metadata["mgb_dtype"] + if not isinstance(arr, np.ndarray): + raise ValueError("arr parameter should be instance of np.ndarray") + if not is_quantize(arr.dtype) or arr_metadata["name"] != metadata.name: + raise ValueError("arr's dtype should be a {} dtype".format(dtype_str)) + is_unsigned = metadata.is_unsigned + if is_unsigned: + scale, zp = ( + arr_metadata["scale"], + arr_metadata["zero_point"], + ) + return (arr.astype(np.float32) - zp) * scale + else: + # don't trick to combine with is_unsigned, seeing ``get_quantized_dtype`` + scale = arr_metadata["scale"] + return (arr.astype(np.float32)) * scale + + +def convert_to_quint8(arr: np.ndarray, q: np.dtype): + """ + Quantize a float NumPy ndarray into a quint8 one with specified params. + + :param arr: Input ndarray. + :param q: Target data type, should be a quint8. + """ + return _convert_to_quantized_dtype(arr, q, "quint8") + + +def convert_from_quint8(arr: np.ndarray): + """ + Dequantize a quint8 NumPy ndarray into a float one. + + :param arr: Input ndarray. + """ + return _convert_from_quantized_dtype(arr, "quint8") + + +def convert_to_qint8(arr: np.ndarray, q: np.dtype): + """ + Quantize a float NumPy ndarray into a qint8 one with specified params. + + :param arr: Input ndarray. + :param q: Target data type, should be a qint8. + """ + return _convert_to_quantized_dtype(arr, q, "qint8") + + +def convert_from_qint8(arr: np.ndarray): + """ + Dequantize a qint8 NumPy ndarray into a float one. + + :param arr: Input ndarray. + """ + return _convert_from_quantized_dtype(arr, "qint8") + + +def convert_to_qint32(arr: np.ndarray, q: np.dtype): + """ + Quantize a float NumPy ndarray into a qint32 one with specified params. + + :param arr: Input ndarray. + :param q: Target data type, should be a qint8. + """ + return _convert_to_quantized_dtype(arr, q, "qint32") + + +def convert_from_qint32(arr): + """ + Dequantize a qint32 NumPy ndarray into a float one. + + :param arr: Input ndarray. + """ + return _convert_from_quantized_dtype(arr, "qint32") + + +def convert_to_quint4(arr: np.ndarray, q: np.dtype): + """ + Quantize a float NumPy ndarray into a quint4 one with specified params. + + :param arr: Input ndarray. + :param q: Target data type, should be a quint4. + """ + return _convert_to_quantized_dtype(arr, q, "quint4") + + +def convert_from_quint4(arr: np.ndarray): + """ + Dequantize a quint4 NumPy ndarray into a float one. + + :param arr: Input ndarray. + """ + return _convert_from_quantized_dtype(arr, "quint4") + + +def convert_to_qint4(arr: np.ndarray, q: np.dtype): + """ + Quantize a float NumPy ndarray into a qint4 one with specified params. + + :param arr: Input ndarray. + :param q: Target data type, should be a qint4. + """ + return _convert_to_quantized_dtype(arr, q, "qint4") + + +def convert_from_qint4(arr: np.ndarray): + """ + Dequantize a qint4 NumPy ndarray into a float one. + + :param arr: Input ndarray. + """ + return _convert_from_quantized_dtype(arr, "qint4") diff --git a/imperative/python/megengine/core/tensor/function.py b/imperative/python/megengine/core/tensor/function.py new file mode 100644 index 0000000000000000000000000000000000000000..9cbb3d56e2ba9fb91a15c64d24ab7873eaffbc05 --- /dev/null +++ b/imperative/python/megengine/core/tensor/function.py @@ -0,0 +1,158 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from ..ops.builtin import OpDef +from .core import TensorBase, TensorWrapperBase, apply +from .raw_tensor import RawTensor +from .tensor import Tensor, push_context +from .tensor_wrapper import TensorWrapper + + +class Function: + """ + Defines a block of operations with customizable differentiation. + + The computation should be defined in ``forward`` method, with gradient + computation defined in ``backward`` method. + + Each instance of ``Function`` should be used only once during forwardding. + + Examples: + + .. testcode:: + + class Sigmoid(Function): + def forward(self, x): + y = 1 / (1 + F.exp(-x)) + self.y = y + return y + + def backward(self. output_grads): + y = self.y + return output_grads * y * (1-y) + + """ + + def __init__(self, *args, **kwargs): + pass + + def __call__(self, *args): + ret = apply(self, *args) + if type(ret) == tuple and len(ret) == 1: + return ret[0] + return ret + + def forward(self, *args, **kwargs): + """ + Applies operations to ``inputs`` and returns results. It must be overriden by all subclasses. + + :param input: Input tensors. + :return: A tuple of Tensor or a single Tensor. + + .. note:: + + This method should return a tuple of Tensor or a single Tensor representing the output + of the function. + """ + raise NotImplementedError + + def backward(self, *output_grads): + """ + Compute the gradient of the forward function. It must be overriden by all subclasses. + + :param output_grads: gradients of outputs that are returned by :meth:`~.function.Function.forward` + + .. note:: + + In case when some tensors of outputs are not related to loss function, the corresponding + values in ``output_grads`` would be ``None``. + + .. note:: + + This method should return a tuple which containing the gradients of all inputs, in the same order + as the ``inputs`` argument of :meth:`~.function.Function.forward` . A ``Tensor`` could be returned + instead if there is only one input. If users want to stop the propagation of some gradients, + the corresponding returned values should be set ``None`` . + + """ + raise NotImplementedError + + def get_backward_fn(self): + if self.backward is None: + return None + + def _backward(*output_grads): + if type(output_grads) is tuple: + _output_grads = map(TensorWrapper, output_grads) + else: + _output_grads = (TensorWrapper(output_grads),) + ret = self.backward(*_output_grads) + if type(ret) is not tuple: + ret = (ret,) + ret = tuple([i.__wrapped__ for i in ret]) + return ret + + return _backward + + +Function.apply = Function.__call__ + + +@apply.add +def _(op: Function, *args: TensorWrapperBase): + assert args + Wrapper = type(args[0]) + + # compute the value for self define function + extra_data_dic = {} + for arg in args: + extra_data_dic[arg.__wrapped__] = arg.__wrapped__._extra_data + arg.__wrapped__._extra_data = {} + + rets = op.forward(*args) + + for arg in args: + arg.__wrapped__._extra_data = extra_data_dic[arg.__wrapped__] + + # update the gradient information for self define function + inputs = tuple(map(lambda i: i.__wrapped__, args)) + outputs = ( + tuple(map(lambda i: i.__wrapped__, rets)) + if type(rets) is tuple + else (rets.__wrapped__,) + ) + + for output in outputs: + output._extra_data = {} + + with push_context() as ctx: + ctx.inputs = inputs + ctx.outputs = outputs + for k in set().union(*(i._extra_data for i in inputs if isinstance(i, Tensor))): + ctx.key = k + data = tuple( + i._extra_data.get(k) if isinstance(i, Tensor) else i for i in inputs + ) + # data are instances of Tracer + # dispatched to apply.add@grad.py + rets = apply(op, *data) + if rets is not None: + assert len(outputs) == len(rets) + for t, i in zip(outputs, rets): + t._extra_data[k] = i + + return tuple(map(Wrapper, outputs)) + + +@apply.add +def _(op: Function, *args: Tensor): + raise NotImplementedError + + +@apply.add +def _(op: Function, *args: RawTensor): + raise NotImplementedError diff --git a/imperative/python/megengine/core/tensor/indexing.py b/imperative/python/megengine/core/tensor/indexing.py new file mode 100644 index 0000000000000000000000000000000000000000..2c612c9b4899c774d50423f76e2f83c577dccee8 --- /dev/null +++ b/imperative/python/megengine/core/tensor/indexing.py @@ -0,0 +1,251 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +from ..ops import builtin +from ..ops.special import Const +from .core import TensorBase, TensorWrapperBase, apply + + +def remove_ellipsis(tensor, tuple_val): + ndim_sum = tensor.ndim + cur_sum = 0 + pos = -1 + for i_idx, i in enumerate(tuple_val): + if i is Ellipsis: + for j in tuple_val[:i_idx:-1]: + if j is Ellipsis: + raise IndexError("only one ellipsis is allowed") + pos = i_idx + else: + cur_sum += i.ndim if hasattr(i, "ndim") else 1 + if pos == -1: + return tuple_val + else: + return ( + tuple_val[:pos] + + (slice(None, None, None),) * (ndim_sum - cur_sum) + + tuple_val[pos + 1 :] + ) + + +def check_bool_index(tensor, tuple_val): + cur_shape = tensor.shape + new_tuple_val = [] + offset = 0 + tdim = 0 + for idx, i in enumerate(tuple_val): + if hasattr(i, "dtype") and i.dtype == np.bool_: + if i.ndim > 1: + tot = i.ndim + for j in range(i.ndim): + if cur_shape[tdim + j - offset] != i.shape[j]: + raise IndexError( + "boolean index did not match tensor along dimension {}; dimension is {} but corresponding boolean dimension is {}".format( + tdim + j, cur_shape[tdim + j - offset], i.shape[j] + ) + ) + i = i.reshape(-1) + cur_shape = ( + cur_shape[:idx] + (i.shape[0],) + cur_shape[tdim + tot - offset :] + ) + offset += 1 + tensor = tensor.reshape(cur_shape) + tdim += tot + new_tuple_val.append(i) + else: + new_tuple_val.append(i) + tdim += 1 + return tensor, new_tuple_val + + +def unpack_getitem(inp, tuple_val, *, allow_newaxis=True): + if not isinstance(tuple_val, tuple): + tuple_val = (tuple_val,) + ndim_indexed = 0 + for i in tuple_val: + if not i is Ellipsis: + ndim_indexed += 1 if not hasattr(i, "ndim") else i.ndim + if ndim_indexed > inp.ndim: + raise IndexError( + "too many indices for tensor: tensor is {}-dimensional, but {} were indexed".format( + inp.ndim, ndim_indexed + ) + ) + + tuple_val = remove_ellipsis(inp, tuple_val) + use_subtensor = True + inp, tuple_val = check_bool_index(inp, tuple_val) + + def is_scalar(d): + if isinstance(i, int): + return True + if type(d).__module__ == np.__name__: + return np.isscalar(d) + # if isinstance(d, (TensorBase, TensorWrapperBase)): + # return d.shape == (1,) + return False + + new_axes = [] + tensors = [] + items = [] + cur_axis = -1 + for i_idx, i in enumerate(tuple_val): + cur_axis += 1 + if i is np.newaxis: + if cur_axis >= 0: + new_axes.append(cur_axis) + continue + + if i is Ellipsis: + cur_axis = -1 + for j in tuple_val[:i_idx:-1]: + if j is Ellipsis: + raise IndexError("only one ellipsis is allowed") + if j is np.newaxis: + new_axes.append(cur_axis) + cur_axis -= 1 + continue + + if ( + not is_scalar(i) + and not i is np.newaxis + and not i is Ellipsis + and not isinstance(i, slice) + ): + use_subtensor = False + + item = [ + cur_axis, + ] + + def is_bool_list(x): + if not isinstance(x, list): + return False + for i in x: + if not isinstance(i, bool): + return False + return True + + def get_index(i): + if not isinstance(i, (TensorBase, TensorWrapperBase)): + if is_bool_list(i) or isinstance(i, np.ndarray) and i.dtype == np.bool_: + (i,) = Const(i, dtype=np.bool_, device=inp.device)(inp) + else: + (i,) = Const(i, dtype=np.int32, device=inp.device)(inp) + return i + assert isinstance(i, (TensorBase, TensorWrapperBase)) + if i.dtype != np.bool_: + return i + _, ind = apply(builtin.CondTake(), i, i) + return ind + + def push(v, item, tensors): + if v is None: + item.append(False) + else: + item.append(True) + v = get_index(v) + assert np.issubdtype(v.dtype, np.integer) or np.issubdtype( + v.dtype, np.bool + ), "var type in the subscript must be int or bool" + tensors.append(v) + + if isinstance(i, slice): + if i.start is None and i.stop is None and i.step is None: + continue + push(i.start, item, tensors) + push(i.stop, item, tensors) + push(i.step, item, tensors) + item.append(False) # idx + else: + item += [False,] * 3 # begin, end, stop + push(i, item, tensors) + assert len(item) == 5 + items.append(item) + if new_axes: + raise IndexError("newaxis is not allowed here") + return inp, tensors, items, use_subtensor + + +def try_condtake(tensor, index): + if not hasattr(index, "dtype") or not hasattr(index, "shape"): + return [] + if index.dtype != np.bool_ or index.shape != tensor.shape: + return [] + if isinstance(index, np.ndarray): + (i,) = Const(i, dtype=np.bool_, device=inp.device)(inp) + assert isinstance(index, (TensorBase, TensorWrapperBase)) + if not isinstance(tensor, (TensorWrapperBase, TensorBase)): + raise TypeError("input must be a tensor") + if tensor.device != index.device: + raise ValueError( + "ambiguous device: {} vs {}".format(tensor.device, index.device) + ) + return apply(builtin.CondTake(), tensor, index) + + +def getitem(tensor, index): + try_result = try_condtake(tensor, index) + if len(try_result) == 2: + return try_result[0] + tensor, tensors, items, use_subtensor = unpack_getitem(tensor, index) + for v in tensors: + if v.shape[0] == 0: + (empty_tensor,) = Const([], dtype=tensor.dtype, device=tensor.device)( + tensor + ) + return empty_tensor + if use_subtensor: + op = builtin.Subtensor(items=items) + else: + op = builtin.IndexingMultiAxisVec(items=items) + (result,) = apply(op, tensor, *tensors) + return result + + +def setitem(tensor, index, value): + org_shape = tensor.shape + try_result = try_condtake(tensor, index) + if len(try_result) == 2: + index = try_result[1] + if index.shape[0] == 0: + return tensor + tensor = tensor.reshape(-1) + if not isinstance(value, (TensorBase, TensorWrapperBase)): + op = Const(value, dtype=tensor.dtype, device=tensor.device) + (value,) = op(tensor) + tensor, tensors, items, use_subtensor = unpack_getitem(tensor, index) + for v in tensors: + if v.shape[0] == 0: + return tensor + if use_subtensor: + op = builtin.Subtensor(items=items) + else: + op = builtin.IndexingMultiAxisVec(items=items) + (tmp_result,) = apply(op, tensor, *tensors) + if value.shape != tmp_result.shape: + for i in range(min(len(value.shape), len(tmp_result.shape))): + if ( + value.shape[-i - 1] != 1 + and value.shape[-i - 1] != tmp_result.shape[-i - 1] + ): + raise ValueError( + "cannot copy tensor with shape {} to subtensor with shape {}".format( + value.shape, tmp_result.shape + ) + ) + value = value.broadcast(tmp_result.shape) + if use_subtensor: + op = builtin.SetSubtensor(items=items) + else: + op = builtin.IndexingSetMultiAxisVec(items=items) + (result,) = apply(op, tensor, value, *tensors) + result = result.reshape(org_shape) + return result diff --git a/imperative/python/megengine/core/tensor/megbrain_graph.py b/imperative/python/megengine/core/tensor/megbrain_graph.py new file mode 100644 index 0000000000000000000000000000000000000000..86f7bcc11bfff0450cca6b1bd618ae7543908e6d --- /dev/null +++ b/imperative/python/megengine/core/tensor/megbrain_graph.py @@ -0,0 +1,196 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections +import threading +import weakref +from concurrent.futures import Future, ThreadPoolExecutor + +from .. import _imperative_rt +from .._wrap import device as as_device +from ..ops.builtin import OpDef +from .core import OpBase, TensorBase, apply + + +class CompiledFunction: + def __init__(self, graph, function): + self._graph = graph + self._function = function + self._future = None + + def execute(self, *args): + assert self._future is None + self._future = self._graph._executor.submit(self._function.execute, *args) + + def wait(self): + assert self._future is not None + self._future.exception() + self._function.wait() + try: + return self._future.result() + finally: + self._future = None + + def __call__(self, *args): + self.execute(*args) + return self.wait() + + +class Graph(_imperative_rt.ComputingGraph): + def __init__(self): + super().__init__() + self._var_cache = weakref.WeakKeyDictionary() + self._op_cache = weakref.WeakKeyDictionary() + self._executor = ThreadPoolExecutor(1) + + def _wrap(self, obj): + if type(obj) is _imperative_rt.VarNode: + wrapper, cache = VarNode, self._var_cache + elif type(obj) is _imperative_rt.OperatorNode: + wrapper, cache = OpNode, self._op_cache + if obj not in cache: + cache[obj] = wrapper(obj) + return cache[obj] + + def compile(self, *args): + return CompiledFunction(self, super().compile(_unwrap(args))) + + +class VarNode(TensorBase): + def __init__(self, node: _imperative_rt.VarNode): + self._node = node + + @property + def graph(self) -> Graph: + return self._node.graph + + @property + def op(self): + return self.graph._wrap(self._node.owner) + + @property + def dtype(self): + return self._node.dtype + + @property + def device(self): + return as_device(self._node.comp_node) + + +class OpNode: + def __init__(self, node: _imperative_rt.OperatorNode): + self._node = node + + @property + def graph(self) -> Graph: + return self._node.graph + + @property + def inputs(self): + return tuple(map(self.graph._wrap, self._node.inputs)) + + @property + def outputs(self): + return tuple(map(self.graph._wrap, self._node.outputs)) + + +def _wrap(x): + if isinstance(x, collections.Sequence): + return type(x)(map(_wrap, x)) + return x.graph._wrap(x) + + +def _unwrap(x): + if isinstance(x, collections.Sequence): + return type(x)(map(_unwrap, x)) + return x._node + + +@apply.add +def _(op: OpDef, *args: VarNode): + outputs = _imperative_rt.invoke_op(op, _unwrap(args)) + return _wrap(outputs) + + +def input_callback(callback, *args, device=None, dtype=None, graph=None): + outputs = _imperative_rt.input_callback( + callback, as_device(device).to_c(), dtype, _unwrap(args), graph=graph + ) + value, dummy = _wrap(outputs) + return value, dummy + + +class InputNode(OpNode): + def __init__(self, *args: VarNode, device=None, dtype=None, graph=None): + r = _imperative_rt.DeviceTensorNDRendezvous() + if device is not None: + device = as_device(device).to_c() + outputs = _imperative_rt.input_callback( + r, device, dtype, _unwrap(args), graph=graph + ) + super().__init__(outputs[0].owner) + self._rendezvous = r + + def set_value(self, value): + assert isinstance(value, _imperative_rt.DeviceTensorND) + self._rendezvous.set(value) + + def reset(self): + self._rendezvous.reset() + + @property + def device(self): + return self.outputs[0].device + + @property + def dtype(self): + return self.outputs[0].dtype + + +def output_callback(callback, var, *args): + args = (var,) + args + dummy = _imperative_rt.output_callback(callback, _unwrap(args)) + return _wrap(dummy) + + +class OutputNode(OpNode): + def __init__(self, var, *args): + args = (var,) + args + r = _imperative_rt.DeviceTensorNDRendezvous() + dummy = _imperative_rt.output_callback(r, _unwrap(args)) + super().__init__(dummy.owner) + self._rendezvous = r + + def get_value(self): + return self._rendezvous.get() + + def reset(self): + self._rendezvous.reset() + + +class TensorAttr: + def __init__(self, shape, dtype, device): + self.shape = shape + self.dtype = dtype + self.device = device + + +class AttrOutputNode(OpNode): + def __init__(self, var, *args): + args = (var,) + args + r = _imperative_rt.TensorAttrRendezvous() + dummy = _imperative_rt.attr_output_callback(r, _unwrap(args)) + super().__init__(dummy.owner) + self._rendezvous = r + + def get_value(self): + attr = self._rendezvous.get() + return TensorAttr(attr.shape, attr.dtype, as_device(attr.comp_node)) + + def reset(self): + self._rendezvous.reset() diff --git a/imperative/python/megengine/core/tensor/raw_tensor/__init__.py b/imperative/python/megengine/core/tensor/raw_tensor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..decca86df20b78a6de30341fe20353ebec60373f --- /dev/null +++ b/imperative/python/megengine/core/tensor/raw_tensor/__init__.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import functools + +import numpy as np + +from ..._imperative_rt import CompNode, DeviceTensorND +from ..._imperative_rt.imperative import ( + _get_dev_tensor, + apply_op, + delete, + get_device, + get_dtype, + get_shape, + get_value, + put, +) +from ..._wrap import device as as_device +from ...ops.builtin import Copy, OpDef, TypeCvt +from ...ops.special import Const +from ..core import OpBase, TensorBase, apply + + +class RawTensor(TensorBase): + + _init_cb = None + _del_cb = None + + def __init__(self, handle): + self._handle = handle + if self._init_cb: + self._init_cb() + + @property + def dtype(self): + return get_dtype(self._handle) + + @property + def device(self): + return as_device(get_device(self._handle)) + + @property + def shape(self): + return get_shape(self._handle) + + def numpy(self): + return get_value(self._handle) + + def _dev_tensor(self): + return _get_dev_tensor(self._handle) + + def __repr__(self): + return "{}({}, device='{}')".format( + type(self).__qualname__, repr(self.numpy()), self.device + ) + + def __del__(self): + if self._del_cb: + self._del_cb() + delete(self._handle) + + +@apply.add +def _(op: OpDef, *args: RawTensor): + outputs = apply_op(op, tuple(i._handle for i in args)) + return tuple(map(RawTensor, outputs)) + + +@apply.add +def _(op: Const, *args: RawTensor): + dtype = op.dtype + device = as_device(op.device).to_c() + return (as_raw_tensor(op.value, dtype=dtype, device=device),) + + +@functools.singledispatch +def as_raw_tensor(obj, dtype=None, device=None): + obj = np.asarray(obj, dtype=dtype) + if obj.dtype == np.float64: + obj = obj.astype(np.float32) + if obj.dtype == np.int64: + obj = obj.astype(np.int32) + return as_raw_tensor(obj, device=device) + + +@as_raw_tensor.register(np.ndarray) +def _(array: np.ndarray, dtype=None, device=None): + device = None if device is None else as_device(device).to_c() + return RawTensor(put(array, dtype=dtype, device=device)) + + +@as_raw_tensor.register(RawTensor) +def _(tensor: RawTensor, dtype=None, device=None): + if dtype is not None: + dtype = np.dtype(dtype) + if dtype != tensor.dtype: + (tensor,) = apply(TypeCvt(dtype=dtype), tensor) + if device is not None: + device = as_device(device) + if device != tensor.device: + (tensor,) = apply(Copy(comp_node=device.to_c()), tensor) + return tensor diff --git a/imperative/python/megengine/core/tensor/raw_tensor/jit.py b/imperative/python/megengine/core/tensor/raw_tensor/jit.py new file mode 100644 index 0000000000000000000000000000000000000000..091b3789d2764662d953e67caf7b28847f85de4f --- /dev/null +++ b/imperative/python/megengine/core/tensor/raw_tensor/jit.py @@ -0,0 +1,251 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import functools +import io +import weakref + + +class partial(functools.partial): + def __get__(self, instance, owner=None): + if instance is None: + return self + return functools.partial(self, instance) + + +def hook(f): + def decorator(impl): + return functools.update_wrapper(partial(f, impl), impl) + + return decorator + + +def on_input(impl, value): + tensor = impl(value) + trace = get_trace() + if trace: + var = trace.get_var(tensor) + event = InputEvent(var) + trace.append(event) + return tensor + + +def on_read_dtype(impl, self): + trace = get_trace() + if trace: + var = trace.get_var(self) + event = ReadDtypeEvent(var) + trace.append(event) + + return impl(self) + + +def on_read_device(impl, self): + trace = get_trace() + if trace: + var = trace.get_var(self) + event = ReadDeviceEvent(var) + trace.append(event) + + return impl(self) + + +def on_read_shape(impl, self): + trace = get_trace() + if trace: + var = trace.get_var(self) + event = ReadShapeEvent(var) + trace.append(event) + + return impl(self) + + +def on_read_value(impl, self): + trace = get_trace() + if trace: + var = trace.get_var(self) + event = ReadValueEvent(var) + trace.append(event) + + return impl(self) + + +def on_builtin_op(impl, op, *args): + outputs = impl(op, *args) + + trace = get_trace() + if trace: + input_vars = tuple(map(trace.get_var, args)) + output_vars = outputs and tuple(map(trace.get_var, outputs)) + event = OpEvent(op, input_vars, output_vars) + trace.append(event) + + return outputs + + +def on_del(impl, self): + trace = get_trace() + if trace: + var = trace.get_var(self) + event = DelEvent(var) + trace.append(event) + + return impl(self) + + +class Trace(list): + def __init__(self): + self._var_id = 1 + self._t2v = weakref.WeakKeyDictionary() + self._v2t = weakref.WeakValueDictionary() + + def get_var(self, x): + v = self._t2v.get(x) + if v: + return v + v = self._var_id + self._var_id += 1 + self._t2v[x] = v + self._v2t[v] = x + return v + + def __bool__(self): + return True + + def __enter__(self): + global _current_trace + if hasattr(self, "_prev_trace"): + raise RuntimeError + self._prev_trace = _current_trace + _current_trace = self + return self + + def __exit__(self, *_): + global _current_trace + if _current_trace is not self: + raise RuntimeError + _current_trace = self._prev_trace + del self._prev_trace + + +class Event: + pass + + +class InputEvent(Event): + def __init__(self, var): + self.var = var + + +class ReadEvent(Event): + def __init__(self, var): + self.var = var + + +class ReadDtypeEvent(ReadEvent): + pass + + +class ReadDeviceEvent(ReadEvent): + pass + + +class ReadShapeEvent(ReadEvent): + pass + + +class ReadValueEvent(ReadEvent): + pass + + +class OpEvent(Event): + def __init__(self, op, inputs, outputs): + self.op = op + self.inputs = inputs + self.outputs = outputs + + +class DelEvent(Event): + def __init__(self, var): + self.var = var + + +_current_trace = None + + +def get_trace() -> Trace: + global _current_trace + return _current_trace + + +def format_trace(trace): + buf = io.StringIO() + active_vars = set() + + def write(fmt, *args, **kwargs): + print(fmt.format(*args, **kwargs), file=buf) + + def init_vars(*args): + for i in args: + if i in active_vars: + continue + active_vars.add(i) + write("_{} = input()", i) + + for event in trace: + if isinstance(event, InputEvent): + init_vars(event.var) + elif isinstance(event, ReadDtypeEvent): + init_vars(event.var) + write("output(_{}.dtype)", event.var) + elif isinstance(event, ReadDeviceEvent): + init_vars(event.var) + write("output(_{}.device)", event.var) + elif isinstance(event, ReadShapeEvent): + init_vars(event.var) + write("output(_{}.shape)", event.var) + elif isinstance(event, ReadValueEvent): + init_vars(event.var) + write("output(_{}.dtype)", event.var) + elif isinstance(event, ReadValueEvent): + init_vars(event.var) + write("output(_{}.value)", event.var) + elif isinstance(event, OpEvent): + init_vars(*event.inputs) + active_vars.update(event.outputs) + ovars = ", ".join(map("_{}".format, event.outputs)) + ivars = ", ".join(map("_{}".format, event.inputs)) + if ovars: + write("{} = {}({})", ovars, repr(event.op), ivars) + else: + write("{}({})", repr(event.op), ivars) + elif isinstance(event, DelEvent): + init_vars(event.var) + write("del _{}", event.var) + else: + raise TypeError(type(event)) + + return buf.getvalue() + + +def compile_trace(trace): + trace = list(trace) + + +def static_function(f): + trace = None + + @functools.wraps(f) + def wrapper(*args, **kwargs): + nonlocal trace + if trace is None: + with Trace() as trace: + return f(*args, **kwargs) + return f(*args, **kwargs) + + return wrapper diff --git a/imperative/python/megengine/core/tensor/raw_tensor/trace_exec.py b/imperative/python/megengine/core/tensor/raw_tensor/trace_exec.py new file mode 100644 index 0000000000000000000000000000000000000000..d16a6ef0642d3226312618ef0ccf00ed2f3b33e4 --- /dev/null +++ b/imperative/python/megengine/core/tensor/raw_tensor/trace_exec.py @@ -0,0 +1,263 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import functools +import weakref + +# Concepts +# +# * Internal tensor +# Tensor produced by the static sequence +# +# * External tensor +# Tensor not produced, but used as input, by the static sequence +# +# * Irrelevant tensor +# Tensor not present in input/output of any op +# +# * Escape +# An internal tensor is said to escape if it is still alive +# at the end of the sequence + +# JIT-ed execution +# +# 1. read attr (dtype, device, shape) +# a. internal tensor +# read out as soon as tensor is produced +# b. external or irrelevant tensor +# fallback +# +# 2. apply op +# bind external tensors in input +# +# 3. del + + +class Action: + pass + + +class ReadAttrAction(Action): + def __init__(self, var, name, getter): + self.var = var + self.name = name + self.getter = getter + + +class ReadValueAction(Action): + def __init__(self, var, getter): + self.var = var + self.getter = getter + + +class GetTensorAction(Action): + def __init__(self, var, getter): + self.var = var + self.getter = getter + + +class OpAction(Action): + def __init__(self, op, inputs, outputs, input_receivers): + self.op = op + self.inputs = inputs + self.outputs = outputs + self.input_receivers = input_receivers + + +class TensorAttr: + def __init__(self): + self.shape = None + self.dtype = None + self.device = None + + +class Bailout(Exception): + pass + + +class Fallback(Exception): + pass + + +def handle_bailout_fallback_finalize(f): + @functools.wraps(f) + def wrapper(self, impl, *args, **kwargs): + try: + return f(*args, **kwargs) + except Bailout: + self.bailout() + except Fallback: + pass + finally: + if self.pc == len(self): + self.finalize() + return impl(*args, **kwargs) + + return wrapper + + +class ExecTrajectory(list): + def __init__(self): + super().__init__() + self.reset() + + def __bool__(self): + return True + + def __enter__(self): + global _current_trajectory + if hasattr(self, "_prev_trajectory"): + raise RuntimeError + self._prev_trajectory = _current_trajectory + _current_trajectory = self + self._exited = False + return self + + def __exit__(self, *exc_info): + # cleanup should be done at completion, + # which is before exiting context manager + assert self._exited == (exc_info == (None, None, None)) + if not self._exited: + assert self.pc < len(self) + self.bailout() + + def _exit(self): + # clean up self and global varaible + assert not self._exited + self.reset() + + global _current_trajectory + if _current_trajectory is not self: + raise RuntimeError + _current_trajectory = self._prev_trajectory + del self._prev_trajectory + + def reset(self): + self._exited = True + self.pc = 0 + self.attr_cache = weakref.WeakKeyDictionary() + + ### Internal and External Tensor ### + # internal tensors are those produced by us + # external tensors are those received from outside + # during JIT-ed execution, internal tensors are just placeholders. + # var_to_tensor is the binding table for all tensors + self.var_to_tensor = {} # var -> weakref[tensor] + # tensor_to_var is the reverse binding table for internal tensors + # note that external tensors could map to >1 vars. + self.tensor_to_var = weakref.WeakKeyDictionary() + # internal tensor will be materialized if its .data is accessed from outside + # after being meterialized, an intern tensor is much like an external tensor + + def finalize(self): + assert self.pc == len(self) + self._exit() + + def bailout(self): + self._exit() + raise NotImplementedError + + def next_action(self): + assert not self._exited + assert self.pc < len(self) + return self[self.pc] + + @handle_bailout_fallback_finalize + def read_attr(self, tensor, name): + attrs = self.attr_cache.setdefault(tensor, TensorAttr()) + value = getattr(attrs, name, None) + if value is None: + action = self.next_action() + if not isinstance(action, ReadAttrAction): + raise Bailout + if name != action.name: + raise Bailout + value = action.getter() + setattr(attrs, name, value) + return value + + @handle_bailout_fallback_finalize + def read_value(self, impl, tensor): + # possibilities: + # 1. internal tensor + # 2. external tensor + # 3. irrelevant tensor (not an input / output of any op) + if tensor not in self.tensor_to_var: + raise Fallback + assert tensor._data is None + action = self.next_action() + if not isinstance(action, ReadValueAction): + raise Bailout + return action.getter() + + @handle_bailout_fallback_finalize + def apply_op(self, impl, op, *args): + from . import RawTensor + + action = self.next_action() + if not isinstance(action, OpAction): + raise Bailout + if len(args) != len(action.inputs): + raise Bailout + assert len(actions.inputs) == len(action.input_receivers) + + for v, t, r in zip(action.inputs, args, action.input_receivers): + if v in self.var_to_tensor: + assert r is None + if t is not self.var_to_tensor[v](): + raise Bailout + else: + # NOTE: not checking for aliasing (>=2 vars map to 1 tensor) + # the static execution backend must handle this + self.var_to_tensor[v] = weakref.ref(t) + r(t) + + outputs = [] + for v in action.outputs: + assert v not in self.var_to_tensor + t = RawTensor() + t._data_getter = functools.partial(self.get_data, v) + outputs.append(t) + self.var_to_tensor[v] = weakref.ref(t) + + return tuple(outputs) + + def get_data(self, var): + tensor = self.var_to_tensor[var]() + assert tensor is not None + assert tensor._data is None + assert tensor in self.tensor_to_var + action = self.next_action() + if not isinstance(action, GetTensorAction): + self.bailout() + elif action.var != var: + self.bailout() + else: + tensor._data = action.getter() + del tensor._data_getter + del self.tensor_to_var[tensor] + assert "_data_getter" not in tensor.__dict__ + return tensor._data_getter() + + +_current_trajectory = None + + +def get_trajectory(): + return _current_trajectory + + +def compile_trace(trace): + from .jit import ReadDTypeEvent, ReadDeviceEvent, ReadShapeEvent, OpEvent, DelEvent + + traj = ExecutionTrajectory() + active_vars = set() + + for event in trace: + if isinstance(event, ReadDTypeEvent): + traj.append(ReadAttrAction()) diff --git a/imperative/python/megengine/core/tensor/tensor.py b/imperative/python/megengine/core/tensor/tensor.py new file mode 100644 index 0000000000000000000000000000000000000000..0f2ff9d78121e9f4bb9746ac97494e9949e84c29 --- /dev/null +++ b/imperative/python/megengine/core/tensor/tensor.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import contextlib +import copy + +from .core import Dispatcher, OpBase, TensorBase, apply + + +class Tensor(TensorBase): + def __init__(self, data: TensorBase): + self._data = data + # _extra_data is set up in Grad.wrt + self._extra_data = {} + self._user_data = {} + + def __getattr__(self, name): + if name in self._user_data: + return self._user_data[name] + raise AttributeError(name) + + def reset(self, other): + assert isinstance(other, __class__) + self.__dict__.clear() + self._data = other.data + self._extra_data = other._extra_data.copy() + self._user_data = other._user_data.copy() + + def copy(self): + other = object.__new__(type(self)) + other.reset(self) + return other + + # tensor interface + + @property + def shape(self): + return self._data.shape + + @property + def dtype(self): + return self._data.dtype + + @property + def device(self): + return self._data.device + + def numpy(self): + return self._data.numpy() + + +class ApplyContext: + def __init__(self): + self.inputs = None + self.outputs = None + self.key = None + + +_context = None + + +@contextlib.contextmanager +def push_context(): + global _context + backup = _context + try: + _context = ApplyContext() + yield _context + finally: + _context = backup + + +def get_context(): + return _context + + +@apply.add +def tensor_apply(op: OpBase, *args: Tensor): + data = tuple(i._data if isinstance(i, Tensor) else i for i in args) + # type(Tensor._data) is RawTensor + # dispached to apply.add@RawTensor.py if passed Tensor args + outputs = apply(op, *data) + ret = tuple(map(Tensor, outputs)) + + with push_context() as ctx: + ctx.inputs = args + ctx.outputs = ret + for k in set().union(*(i._extra_data for i in args if isinstance(i, Tensor))): + ctx.key = k + data = tuple( + i._extra_data.get(k) if isinstance(i, Tensor) else i for i in args + ) + # data are instances of Tracer + # dispatched to apply.add@grad.py + outputs = apply(op, *data) + if outputs is not None: + assert len(outputs) == len(ret) + for t, i in zip(ret, outputs): + t._extra_data[k] = i + + return ret diff --git a/imperative/python/megengine/core/tensor/tensor_wrapper.py b/imperative/python/megengine/core/tensor/tensor_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..011e7d1183db67dd129a82051e42a160082295ae --- /dev/null +++ b/imperative/python/megengine/core/tensor/tensor_wrapper.py @@ -0,0 +1,367 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import abc +import collections + +import numpy as np + +from ..ops import builtin +from ..ops.special import Const +from . import utils +from .core import OpBase, TensorBase, TensorWrapperBase, apply +from .indexing import getitem as _getitem +from .indexing import setitem as _setitem +from .raw_tensor import RawTensor, as_raw_tensor +from .tensor import Tensor + + +def _elwise(*args, mode): + op = builtin.Elemwise(mode=mode) + args = utils.convert_inputs(*args) + (result,) = apply(op, *args) + return result + + +def _matmul(inp1, inp2): + op = builtin.MatrixMul( + transposeA=False, transposeB=False, compute_mode="DEFAULT", format="DEFAULT" + ) + inp1, inp2 = utils.convert_inputs(inp1, inp2) + (result,) = apply(op, inp1, inp2) + return result + + +def _transpose(data, axes): + op = builtin.Dimshuffle(axes) + (data,) = utils.convert_inputs(data) + (result,) = apply(op, data) + return result + + +def _broadcast(inp, shape): + shape = utils.astensor1d(shape, inp, dtype="int32", device=inp.device) + (result,) = apply(builtin.Broadcast(), inp, shape) + return result + + +def _reshape(x, shape): + if isinstance(shape, (TensorBase, TensorWrapperBase)): + shape = shape.numpy() + shape = tuple(map(int, shape)) + unspec_axis = None + for i, s in enumerate(shape): + if s < 0: + if s != -1: + raise ValueError("expect shape[{}] >= -1, got {}".format(i, s)) + if unspec_axis is not None: + raise ValueError("multiple -1 in shape: {} & {}".format(unspec_axis, i)) + unspec_axis = i + + # TODO: device should be None (cpu) + (shape,) = Const(shape, dtype=np.int32, device=x.device)(x) + if unspec_axis is None: + op = builtin.Reshape() + else: + op = builtin.Reshape(unspec_axis=unspec_axis) + (x,) = apply(op, x, shape) + return x + + +def _unary_elwise(mode): + def f(self): + return _elwise(self, mode=mode) + + return f + + +def _binary_elwise(mode, rev=False): + if not rev: + + def f(self, value): + return _elwise(self, value, mode=mode) + + else: + + def f(self, value): + return _elwise(value, self, mode=mode) + + return f + + +def _logical_unary_elwise(mode, rev=False): + def f(self): + if self.dtype != np.bool_: + raise TypeError("{} requires a bool tensor".format(mode)) + return _elwise(self, mode=mode) + + return f + + +def _logical_binary_elwise(mode, rev=False): + if not rev: + + def f(self, value): + if self.dtype != np.bool_ or value.dtype != np.bool_: + raise TypeError("{} requires 2 bool tensors".format(mode)) + return _elwise(self, value, mode=mode) + + else: + + def f(self, value): + if self.dtype != np.bool_ or value.dtype != np.bool_: + raise TypeError("{} requires 2 bool tensors".format(mode)) + return _elwise(value, self, mode=mode) + + return f + + +def _reduce(mode): + def f(self, axis=None): + inp = self + if axis is None: + inp = self.flatten() + axis = 0 + op = builtin.Reduce(mode=mode, axis=axis) + (result,) = utils.convert_inputs(inp) + (result,) = apply(op, result) + return result + + return f + + +def _inplace(f): + def g(self, value): + result = f(self, value) + if result is NotImplemented: + raise NotImplementedError + self._reset(result) + return self + + return g + + +def _todo(*_): + raise NotImplementedError + + +class ArrayMethodMixin(abc.ABC): + + __array_priority__ = 233333 + + @abc.abstractmethod + def _reset(self, other): + pass + + @abc.abstractproperty + def dtype(self) -> np.dtype: + pass + + @abc.abstractproperty + def shape(self) -> tuple: + pass + + @abc.abstractmethod + def numpy(self) -> np.ndarray: + pass + + __hash__ = None # due to __eq__ diviates from python convention + + __lt__ = lambda self, value: _elwise(self, value, mode="LT").astype("bool") + __le__ = lambda self, value: _elwise(self, value, mode="LEQ").astype("bool") + __gt__ = lambda self, value: _elwise(value, self, mode="LT").astype("bool") + __ge__ = lambda self, value: _elwise(value, self, mode="LEQ").astype("bool") + __eq__ = lambda self, value: _elwise(self, value, mode="EQ").astype("bool") + __ne__ = lambda self, value: _elwise( + _elwise(self, value, mode="EQ").astype("bool"), mode="NOT" + ) + + __neg__ = _unary_elwise("NEGATE") + __pos__ = lambda self: self + __abs__ = _unary_elwise("ABS") + __invert__ = _logical_unary_elwise("NOT") + __round__ = _unary_elwise("ROUND") + __trunc__ = _todo + __floor__ = _unary_elwise("FLOOR") + __ceil__ = _unary_elwise("CEIL") + + __add__ = _binary_elwise("ADD") + __sub__ = _binary_elwise("SUB") + __mul__ = _binary_elwise("MUL") + __matmul__ = lambda self, other: _matmul(self, other) + __truediv__ = _binary_elwise("TRUE_DIV") + __floordiv__ = _binary_elwise("FLOOR_DIV") + __mod__ = _binary_elwise("MOD") + # __divmode__ + __pow__ = _binary_elwise("POW") + __lshift__ = _binary_elwise("SHL") + __rshift__ = _binary_elwise("SHR") + __and__ = _logical_binary_elwise("AND") + __or__ = _logical_binary_elwise("OR") + __xor__ = _logical_binary_elwise("XOR") + + __radd__ = _binary_elwise("ADD", rev=1) + __rsub__ = _binary_elwise("SUB", rev=1) + __rmul__ = _binary_elwise("MUL", rev=1) + __rmatmul__ = lambda self, other: _matmul(other, self) + __rtruediv__ = _binary_elwise("TRUE_DIV", rev=1) + __rfloordiv__ = _binary_elwise("FLOOR_DIV", rev=1) + __rmod__ = _binary_elwise("MOD", rev=1) + # __rdivmode__ + __rpow__ = _binary_elwise("POW", rev=1) + __rlshift__ = _binary_elwise("SHL", rev=1) + __rrshift__ = _binary_elwise("SHR", rev=1) + __rand__ = _logical_binary_elwise("AND", rev=1) + __ror__ = _logical_binary_elwise("OR", rev=1) + __rxor__ = _logical_binary_elwise("XOR", rev=1) + + __iadd__ = _inplace(__add__) + __isub__ = _inplace(__sub__) + __imul__ = _inplace(__mul__) + __imatmul__ = _inplace(__matmul__) + __itruediv__ = _inplace(__truediv__) + __ifloordiv__ = _inplace(__floordiv__) + __imod__ = _inplace(__mod__) + __ipow__ = _inplace(__pow__) + __ilshift__ = _inplace(__lshift__) + __irshift__ = _inplace(__rshift__) + __iand__ = _inplace(__and__) + __ior__ = _inplace(__or__) + __ixor__ = _inplace(__xor__) + + __index__ = lambda self: self.item().__index__() + __bool__ = lambda self: bool(self.item()) + __int__ = lambda self: int(self.item()) + __float__ = lambda self: float(self.item()) + __complex__ = lambda self: complex(self.item()) + + def __len__(self): + shape = self.shape + if shape: + return int(shape[0]) + raise TypeError("ndim is 0") + + def __iter__(self): + for i in range(len(self)): + yield self[i] + + def __getitem__(self, index): + return _getitem(self, index) + + def __setitem__(self, index, value): + if index is not Ellipsis: + value = _setitem(self, index, value) + self._reset(value) + + __contains__ = _todo + + @property + def ndim(self): + return len(self.shape) + + @property + def size(self): + return np.prod(self.shape).item() + + @property + def T(self): + return self.transpose() + + def item(self, *args): + if not args: + assert self.size == 1 + return self.numpy().item() + return self[args].item() + + def tolist(self): + return self.numpy().tolist() + + def astype(self, dtype): + return utils.astype(self, dtype) + + def reshape(self, *args): + if len(args) == 1: + if isinstance(args[0], collections.Sequence): + args = args[0] + return _reshape(self, args) + + def broadcast(self, *args): + if len(args) == 1: + if isinstance(args[0], collections.Sequence): + args = args[0] + return _broadcast(self, args) + + def transpose(self, *args): + if not args: + args = reversed(range(self.ndim)) + elif len(args) == 1: + if isinstance(args[0], collections.Sequence): + args = args[0] + return _transpose(self, args) + + def flatten(self): + return self.reshape(-1) + + sum = _reduce("SUM") + prod = _reduce("PRODUCT") + min = _reduce("MIN") + max = _reduce("MAX") + mean = _reduce("MEAN") + + +class GenericTensorWrapper(ArrayMethodMixin, TensorWrapperBase): + def __init__(self, data): + self.__wrapped__ = data + + def _reset(self, other): + if not isinstance(other, __class__): + raise TypeError(type(other)) + self.__wrapped__ = other.__wrapped__ + return self + + @property + def dtype(self): + return self.__wrapped__.dtype + + @property + def shape(self): + return self.__wrapped__.shape + + @property + def device(self): + return self.__wrapped__.device + + def numpy(self): + return self.__wrapped__.numpy() + + +class TensorWrapper(GenericTensorWrapper): + def __init__(self, data, dtype=None, device=None): + if isinstance(data, TensorWrapperBase): + data = data.__wrapped__ + elif not isinstance(data, TensorBase): + assert data is not None, "Cannot init a tensor with data as None" + data = Tensor(as_raw_tensor(data, dtype=dtype, device=device)) + super().__init__(data) + + def _reset(self, other): + if isinstance(other, TensorWrapperBase): + self.__wrapped__ = other.__wrapped__ + elif isinstance(other, TensorBase): + self.__wrapped__ = other + else: + self._reset(type(self)(other, dtype=self.dtype, device=self.device)) + + def __repr__(self): + piece = "Tensor(" + with np.printoptions(precision=4, suppress=True): + piece += "{}".format(str(self.numpy())) + if self.dtype != np.float32: + piece += ", dtype={}".format(np.dtype(self.dtype).name) + piece += ", device={}".format(self.device) + ")" + return piece diff --git a/imperative/python/megengine/core/tensor/utils.py b/imperative/python/megengine/core/tensor/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a059ff8dfd116621ae5c16835357a182e01f0477 --- /dev/null +++ b/imperative/python/megengine/core/tensor/utils.py @@ -0,0 +1,154 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections +from typing import Iterable, Union + +import numpy as np + +from ..ops import builtin +from ..ops.special import Const +from ..tensor.core import OpBase, TensorBase, TensorWrapperBase, apply + + +def dtype_promotion(raw_inputs): + def add_dtype(i): + if type(i) == int: + return np.array(i, dtype=np.int32) + if type(i) == float: + return np.array(i, dtype=np.float32) + if type(i) == bool: + return np.array(i, dtype=np.bool_) + return None + + scalar_inputs = [ + add_dtype(i) for i in raw_inputs if not hasattr(i, "dtype") and add_dtype(i) + ] + inputs = [i for i in raw_inputs if hasattr(i, "dtype")] + assert len(scalar_inputs + inputs) > 0 + dtype = np.result_type(*inputs) + dtype_all = np.result_type(*(inputs + scalar_inputs)) + assert ( + dtype != np.float64 and dtype != np.int64 + ), "unsupport dtype {} by dtype_promotion, please use explict type convert".format( + dtype + ) + if dtype_all == np.bool_: + for i in raw_inputs: + if not hasattr(i, "dtype") or i.dtype != np.bool_: + raise TypeError( + "bool dtype can not be operated with an element without bool dtype" + ) + if dtype_all == np.float64: + dtype_all = np.float32 + return dtype_all + + +def get_device(inputs): + device = None + for i in inputs: + if isinstance(i, (TensorWrapperBase, TensorBase)): + if device is None: + device = i.device + elif device != i.device: + raise ValueError("ambiguous device: {} vs {}".format(device, i.device)) + assert device is not None + return device + + +def concatenate(inputs, axis=0, *, device=None): + dtype = dtype_promotion(inputs) + device = get_device(inputs) + + def convert(x): + return convert_single_value(x, inputs, dtype=dtype) + + inputs = tuple(map(convert, inputs)) + (result,) = apply(builtin.Concat(axis=axis, comp_node=device.to_c()), *inputs) + return result + + +def astype(x, dtype): + dtype = np.dtype(dtype) + if x.dtype != dtype: + (x,) = apply(builtin.TypeCvt(param=dtype), x) + return x + + +def convert_single_value(v, inputs, *, dtype=None, device=None): + tensors = [i for i in inputs if isinstance(i, (TensorBase, TensorWrapperBase))] + assert len(tensors) > 0 + if isinstance(v, (TensorWrapperBase, TensorBase)): + v = astype(v, dtype) + else: + (v,) = Const(v, dtype=dtype, device=device)(*tensors) + return v + + +def convert_inputs(*args: TensorBase): + dtype = dtype_promotion(args) + device = get_device(args) + + def convert(value): + if value is None: + return value + return convert_single_value(value, args, dtype=dtype, device=device) + + return tuple(map(convert, args)) + + +def result_type(*args): + dtypes = [] + for i in args: + if isinstance(i, (TensorWrapperBase, TensorBase)): + dtypes.append(i.dtype) + continue + try: + dtypes.append(np.dtype(i)) + except TypeError: + pass + return np.result_type(*dtypes) + + +def isscalar(x): + try: + return x.ndim == 0 + except: + pass + return np.isscalar(x) + + +def astensor1d(x, *reference, dtype=None, device=None): + """ + Convert something to 1D tensor. Support following types + * sequence of scalar literal / tensor + * numpy array + * tensor (returned as is, regardless of dtype and device) + """ + try: + ndim = x.ndim + except AttributeError: + pass + else: + if ndim != 1: + raise ValueError("ndim != 1: %d" % ndim) + if not isinstance(x, (TensorBase, TensorWrapperBase)): + (x,) = Const(x, dtype=dtype, device=device)(*reference) + return x + + if not isinstance(x, collections.Sequence): + raise TypeError + + if any(isinstance(i, (TensorBase, TensorWrapperBase)) for i in x): + x = concatenate(x, device=device) + if dtype is not None: + x = astype(x, dtype) + return x + + (x,) = Const(x, dtype=dtype, device=device)(*reference) + return x diff --git a/imperative/python/megengine/data/__init__.py b/imperative/python/megengine/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3b1e0d556e66b1c71124389df749537b4cc7452c --- /dev/null +++ b/imperative/python/megengine/data/__init__.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .collator import Collator +from .dataloader import DataLoader +from .sampler import ( + Infinite, + RandomSampler, + ReplacementSampler, + Sampler, + SequentialSampler, +) diff --git a/imperative/python/megengine/data/_queue.py b/imperative/python/megengine/data/_queue.py new file mode 100644 index 0000000000000000000000000000000000000000..a9e328c65c56e4f4ba736b510176677b6c735c32 --- /dev/null +++ b/imperative/python/megengine/data/_queue.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import binascii +import os +import queue +import subprocess +from multiprocessing import Queue + +import pyarrow +import pyarrow.plasma as plasma + +MGE_PLASMA_MEMORY = int(os.environ.get("MGE_PLASMA_MEMORY", 4000000000)) # 4GB + +# Each process only need to start one plasma store, so we set it as a global variable. +# TODO: how to share between different processes? +MGE_PLASMA_STORE_MANAGER = None + + +def _clear_plasma_store(): + # `_PlasmaStoreManager.__del__` will not be called automaticly in subprocess, + # so this function should be called explicitly + global MGE_PLASMA_STORE_MANAGER + if MGE_PLASMA_STORE_MANAGER is not None: + del MGE_PLASMA_STORE_MANAGER + MGE_PLASMA_STORE_MANAGER = None + + +class _PlasmaStoreManager: + __initialized = False + + def __init__(self): + self.socket_name = "/tmp/mge_plasma_{}".format( + binascii.hexlify(os.urandom(8)).decode() + ) + debug_flag = bool(os.environ.get("MGE_DATALOADER_PLASMA_DEBUG", 0)) + # NOTE: this is a hack. Directly use `plasma_store` may make subprocess + # difficult to handle the exception happened in `plasma-store-server`. + # For `plasma_store` is just a wrapper of `plasma-store-server`, which use + # `os.execv` to call the executable `plasma-store-server`. + cmd_path = os.path.join(pyarrow.__path__[0], "plasma-store-server") + self.plasma_store = subprocess.Popen( + [cmd_path, "-s", self.socket_name, "-m", str(MGE_PLASMA_MEMORY),], + stdout=None if debug_flag else subprocess.DEVNULL, + stderr=None if debug_flag else subprocess.DEVNULL, + ) + self.__initialized = True + + def __del__(self): + if self.__initialized and self.plasma_store.returncode is None: + self.plasma_store.kill() + + +class PlasmaShmQueue: + def __init__(self, maxsize: int = 0): + r"""Use pyarrow in-memory plasma store to implement shared memory queue. + + Compared to native `multiprocess.Queue`, `PlasmaShmQueue` avoid pickle/unpickle + and communication overhead, leading to better performance in multi-process + application. + + :type maxsize: int + :param maxsize: maximum size of the queue, `None` means no limit. (default: ``None``) + """ + + # Lazy start the plasma store manager + global MGE_PLASMA_STORE_MANAGER + if MGE_PLASMA_STORE_MANAGER is None: + try: + MGE_PLASMA_STORE_MANAGER = _PlasmaStoreManager() + except Exception as e: + err_info = ( + "Please make sure pyarrow installed correctly!\n" + "You can try reinstall pyarrow and see if you can run " + "`plasma_store -s /tmp/mge_plasma_xxx -m 1000` normally." + ) + raise RuntimeError( + "Exception happened in starting plasma_store: {}\n" + "Tips: {}".format(str(e), err_info) + ) + + self.socket_name = MGE_PLASMA_STORE_MANAGER.socket_name + + # TODO: how to catch the exception happened in `plasma.connect`? + self.client = None + + # Used to store the header for the data.(ObjectIDs) + self.queue = Queue(maxsize) # type: Queue + + def put(self, data, block=True, timeout=None): + if self.client is None: + self.client = plasma.connect(self.socket_name) + try: + object_id = self.client.put(data) + except plasma.PlasmaStoreFull: + raise RuntimeError("plasma store out of memory!") + try: + self.queue.put(object_id, block, timeout) + except queue.Full: + self.client.delete([object_id]) + raise queue.Full + + def get(self, block=True, timeout=None): + if self.client is None: + self.client = plasma.connect(self.socket_name) + object_id = self.queue.get(block, timeout) + if not self.client.contains(object_id): + raise RuntimeError( + "ObjectID: {} not found in plasma store".format(object_id) + ) + data = self.client.get(object_id) + self.client.delete([object_id]) + return data + + def qsize(self): + return self.queue.qsize() + + def empty(self): + return self.queue.empty() + + def join(self): + self.queue.join() + + def disconnect_client(self): + if self.client is not None: + self.client.disconnect() + + def close(self): + self.queue.close() + self.disconnect_client() + _clear_plasma_store() + + def cancel_join_thread(self): + self.queue.cancel_join_thread() diff --git a/imperative/python/megengine/data/collator.py b/imperative/python/megengine/data/collator.py new file mode 100644 index 0000000000000000000000000000000000000000..952fc39881eb092c5caa786e83ca443f0a1d818b --- /dev/null +++ b/imperative/python/megengine/data/collator.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2016- Facebook, Inc (Adam Paszke) +# Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +# Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +# Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +# Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +# Copyright (c) 2011-2013 NYU (Clement Farabet) +# Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) +# Copyright (c) 2006 Idiap Research Institute (Samy Bengio) +# Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) +# --------------------------------------------------------------------- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# +# This file has been modified by Megvii ("Megvii Modifications"). +# All Megvii Modifications are Copyright (C) 2014-2020 Megvii Inc. All rights reserved. +# ---------------------------------------------------------------------- +import collections.abc +import re + +import numpy as np + +np_str_obj_array_pattern = re.compile(r"[aO]") +default_collate_err_msg_format = ( + "default_collator: inputs must contain numpy arrays, numbers, " + "Unicode strings, bytes, dicts or lists; found {}" +) + + +class Collator: + r""" + Used for merge a list of samples to form a mini-batch of Tenor(s). Used when using batched loading from a dataset. + modified from https://github.com/pytorch/pytorch/blob/master/torch/utils/data/_utils/collate.py + """ + + def apply(self, inputs): + """ + input : sequence_N(tuple(CHW, C, CK)) + output : tuple(NCHW, NC, NCK) + """ + elem = inputs[0] + elem_type = type(elem) + if ( + elem_type.__module__ == "numpy" + and elem_type.__name__ != "str_" + and elem_type.__name__ != "string_" + ): + elem = inputs[0] + if elem_type.__name__ == "ndarray": + # array of string classes and object + if np_str_obj_array_pattern.search(elem.dtype.str) is not None: + raise TypeError(default_collate_err_msg_format.format(elem.dtype)) + + return np.ascontiguousarray(np.stack(inputs)) + elif elem.shape == (): # scalars + return np.array(inputs) + elif isinstance(elem, float): + return np.array(inputs, dtype=np.float64) + elif isinstance(elem, int): + return np.array(inputs) + elif isinstance(elem, (str, bytes)): + return inputs + elif isinstance(elem, collections.abc.Mapping): + return {key: self.apply([d[key] for d in inputs]) for key in elem} + elif isinstance(elem, tuple) and hasattr(elem, "_fields"): # namedtuple + return elem_type(*(self.apply(samples) for samples in zip(*inputs))) + elif isinstance(elem, collections.abc.Sequence): + transposed = zip(*inputs) + return [self.apply(samples) for samples in transposed] + + raise TypeError(default_collate_err_msg_format.format(elem_type)) diff --git a/imperative/python/megengine/data/dataloader.py b/imperative/python/megengine/data/dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..1fd3482df727ddc84970bce239ebfd60990db6e1 --- /dev/null +++ b/imperative/python/megengine/data/dataloader.py @@ -0,0 +1,500 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections +import math +import multiprocessing +import queue +import random +import time + +import numpy as np + +from ..logger import get_logger +from ..random.rng import _random_seed_generator +from .collator import Collator +from .dataset import Dataset +from .sampler import Sampler, SequentialSampler +from .transform import PseudoTransform, Transform + +logger = get_logger(__name__) + + +MP_QUEUE_GET_TIMEOUT = 5 + + +class DataLoader: + __initialized = False + + def __init__( + self, + dataset: Dataset, + sampler: Sampler = None, + transform: Transform = None, + collator: Collator = None, + num_workers: int = 0, + timeout: int = 0, + divide: bool = False, + ): + r"""Provides a convenient way to iterate on a given dataset. + + `DataLoader` combines a dataset with sampler, transform and collator, + make it flexible to get minibatch continually from a dataset. + + :type dataset: Dataset + :param dataset: dataset from which to load the minibatch. + :type sampler: Sampler + :param sampler: defines the strategy to sample data from the dataset. + If specified, :attr:`shuffle` must be ``False``. + :type transform: Transform + :param transform: defined the transforming strategy for a sampled batch. + (default: ``None``) + :type collator: Collator + :param collator: defined the merging strategy for a transformed batch. + (default: ``None``) + :type num_workers: int + :param num_workers: the number of sub-process to load, transform and collate + the batch. ``0`` means using single-process. (default: ``0``) + :type timeout: int + :param timeout: if positive, means the timeout value(second) for collecting a + batch from workers. (default: 0) + :type divide: bool + :param divide: define the paralleling strategy in multi-processing mode. + ``True`` means one batch is divided into :attr:`num_workers` pieces, and + the workers will process these pieces parallelly. ``False`` means + different sub-process will process different batch. (default: ``False``) + + """ + + if num_workers < 0: + raise ValueError("num_workers should not be negative") + + if timeout < 0: + raise ValueError("timeout should not be negative") + + if divide and num_workers <= 1: + raise ValueError("divide should not be set to True when num_workers <= 1") + + self.dataset = dataset + self.num_workers = num_workers + self.timeout = timeout + + self.divide = divide + + if sampler is None: + self.sampler = SequentialSampler(dataset, batch_size=1, drop_last=False) + else: + self.sampler = sampler + + if divide: + if self.sampler.batch_size <= self.num_workers: + raise ValueError( + "batch size must not smaller than num_workers in divide mode." + ) + elif self.sampler.batch_size % self.num_workers: + logger.warning( + "batch size is not divisible by num_workers, may lose performance in divide mode." + ) + + if transform is None: + self.transform = PseudoTransform() + else: + self.transform = transform + + if collator is None: + self.collator = Collator() + else: + self.collator = collator + + self.__initialized = True + + def __iter__(self): + if self.num_workers == 0: + return _SerialDataLoaderIter(self) + else: + return _ParallelDataLoaderIter(self) + + def __len__(self): + return len(self.sampler) + + +class _BaseDataLoaderIter: + def __init__(self, loader): + self.dataset = loader.dataset + self.sampler = loader.sampler + self.seed = _random_seed_generator().__next__() + self.transform = loader.transform + self.collator = loader.collator + self.num_workers = loader.num_workers + self.timeout = loader.timeout + self.divide = loader.divide + self.num_processed = 0 + + def _get_next_batch(self): + raise NotImplementedError + + def __len__(self): + return len(self.sampler) + + def __iter__(self): + return self + + def __next__(self): + if self.num_processed >= len(self): + raise StopIteration + minibatch = self._get_next_batch() + self.num_processed += 1 + return minibatch + + +class _SerialDataLoaderIter(_BaseDataLoaderIter): + def __init__(self, loader): + super(_SerialDataLoaderIter, self).__init__(loader) + self.indices_iter = iter(self.sampler) + + def _get_next_batch(self): + indices = next(self.indices_iter) + items = [self.dataset[idx] for idx in indices] + trans_items = self.transform.apply_batch(items) + return self.collator.apply(trans_items) + + +class _ParallelDataLoaderIter(_BaseDataLoaderIter): + __initialized = False + + def __init__(self, loader): + super(_ParallelDataLoaderIter, self).__init__(loader) + + self.task_queues = [ + multiprocessing.Queue(maxsize=2) for _ in range(self.num_workers) + ] + + self.feed_batch_idx = multiprocessing.Value("i", 0) + self.target_batch_idx = multiprocessing.Value("i", 0) + self.shutdown_flag = multiprocessing.Value("i", 0) + + self.trans_data_queues = [ + multiprocessing.Queue(maxsize=1) for _ in range(self.num_workers) + ] + + # use shared-memory queue implemented by pyarrow plasma store. + from ._queue import PlasmaShmQueue + + self.batch_queue = PlasmaShmQueue(maxsize=2) + + self.task_feeding_worker = multiprocessing.Process( + target=_task_feeding_loop, + args=( + iter(self.sampler), + self.task_queues, + self.num_workers, + self.divide, + self.shutdown_flag, + self.feed_batch_idx, + ), + daemon=True, + ) + self.task_feeding_worker.start() + + self.workers = [] + for worker_id in range(self.num_workers): + worker = multiprocessing.Process( + target=_worker_loop, + args=( + self.dataset, + self.task_queues[worker_id], + self.trans_data_queues[worker_id], + self.transform, + self.seed + worker_id + 1, + self.shutdown_flag, + ), + daemon=True, + ) + worker.start() + self.workers.append(worker) + + if self.divide: + self.data_collecting_worker = multiprocessing.Process( + target=_data_gathering_loop, + args=( + self.trans_data_queues, + self.batch_queue, + self.collator, + len(self), + self.num_workers, + self.shutdown_flag, + self.target_batch_idx, + ), + daemon=True, + ) + else: + self.data_collecting_worker = multiprocessing.Process( + target=_data_selecting_loop, + args=( + self.trans_data_queues, + self.batch_queue, + self.collator, + len(self), + self.num_workers, + self.shutdown_flag, + self.target_batch_idx, + ), + daemon=True, + ) + self.data_collecting_worker.start() + + self.__initialized = True + + def _check_workers(self): + # Check the status of each worker. + if not self.data_collecting_worker.is_alive(): + exitcode = self.task_feeding_worker.exitcode + if exitcode != 0: + raise RuntimeError("data collecting worker died. {}".format(exitcode)) + + if not self.task_feeding_worker.is_alive(): + exitcode = self.task_feeding_worker.exitcode + if exitcode != 0: + raise RuntimeError("task feeding worker died. {}".format(exitcode)) + + for worker_id, worker in enumerate(self.workers): + if not worker.is_alive(): + exitcode = worker.exitcode + if exitcode != 0: + raise RuntimeError("worker:{} died. {}".format(worker_id, exitcode)) + + logger.debug("all workers are alive.") + + def _try_get_next_batch(self): + start_time = time.time() + while True: + self._check_workers() + try: + return self.batch_queue.get(timeout=1) + except queue.Empty: + logger.debug("batch queue empty!") + waited_time = time.time() - start_time + if self.timeout > 0: + if waited_time > self.timeout: + raise RuntimeError("get_next_batch timeout!") + + def _get_next_batch(self): + batch_data = self._try_get_next_batch() + return batch_data + + def _shutdown(self): + with self.shutdown_flag.get_lock(): + self.shutdown_flag.value = 1 + + if self.task_feeding_worker.is_alive(): + self.task_feeding_worker.terminate() + self.task_feeding_worker.join() + + if self.data_collecting_worker.is_alive(): + self.data_collecting_worker.terminate() + self.data_collecting_worker.join() + + for worker in self.workers: + if worker.is_alive(): + worker.terminate() + worker.join() + + for q in self.trans_data_queues: + q.cancel_join_thread() + q.close() + + for q in self.task_queues: + q.cancel_join_thread() + q.close() + + self.batch_queue.cancel_join_thread() + self.batch_queue.close() + + def __del__(self): + if self.__initialized: + self._shutdown() + + +def _task_feeding_loop( + indices_iter, task_queues, num_workers, divide, shutdown_flag, feed_batch_idx +): + # Feed the indices into the task queues + while True: + if shutdown_flag.value == 1: + break + batch_idx = feed_batch_idx.value + try: + indices = next(indices_iter) + except StopIteration: + break + if divide: + # make sure all task_queues is ready for put + while any([q.full() for q in task_queues]): + if shutdown_flag.value == 1: + return + # divide into small pieces, feed to different workers. + sub_num = math.ceil(len(indices) / num_workers) + for worker_id in range(num_workers): + sub_indices = indices[worker_id * sub_num : (worker_id + 1) * sub_num] + task_queues[worker_id].put((batch_idx, sub_indices)) + else: + # distribute tasks to different workers uniformly. + target_id = batch_idx % num_workers + while task_queues[target_id].full(): + if shutdown_flag.value == 1: + return + task_queues[target_id].put((batch_idx, indices)) + with feed_batch_idx.get_lock(): + feed_batch_idx.value += 1 + + +def _worker_loop(dataset, task_queue, trans_data_queue, transform, seed, shutdown_flag): + # Get dataset items and do the transform + random.seed(seed) + np.random.seed(seed) + while True: + if shutdown_flag.value == 1: + break + try: + batch_idx, indices = task_queue.get(timeout=MP_QUEUE_GET_TIMEOUT) + except queue.Empty: + continue + if len(indices) > 0: + items = [dataset[idx] for idx in indices] + trans_items = transform.apply_batch(items) + else: + # in case of incomplete last batch + trans_items = () + while True: + try: + trans_data_queue.put((batch_idx, trans_items), timeout=1) + break + except queue.Full: + if shutdown_flag.value == 1: + break + logger.debug("batch part queue is full!") + + +def _data_gathering_loop( + trans_data_queues, + batch_queue, + collator, + length, + num_workers, + shutdown_flag, + target_idx, +): + # Gathering the small pieces of batch data into full batch data + while True: + if shutdown_flag.value == 1: + break + + target_batch_idx = target_idx.value + + if target_batch_idx >= length: + break + + full_trans_items = [] + for worker_id in range(num_workers): + while True: + try: + batch_idx, trans_items = trans_data_queues[worker_id].get( + timeout=MP_QUEUE_GET_TIMEOUT + ) + break + except queue.Empty: + if shutdown_flag.value == 1: + break + logger.debug( + "worker:{} data queue get timeout! target batch idx:{}".format( + worker_id, target_batch_idx + ) + ) + if batch_idx != target_batch_idx: + raise RuntimeError( + "Unexperted batch_idx in data gathering loop. worker_id:{}.".format( + worker_id + ) + ) + else: + full_trans_items.extend(trans_items) + + # Merge different parts into a batch. + full_batch = collator.apply(full_trans_items) + + while True: + try: + batch_queue.put(full_batch, timeout=1) + break + except queue.Full: + if shutdown_flag.value == 1: + break + logger.debug("batch queue is full!") + + with target_idx.get_lock(): + target_idx.value += 1 + + batch_queue.disconnect_client() + + +def _data_selecting_loop( + trans_data_queues, + batch_queue, + collator, + length, + num_workers, + shutdown_flag, + target_idx, +): + # Make sure that batch is generated exactly with the same order as generated indices + while True: + if shutdown_flag.value == 1: + break + + target_batch_idx = target_idx.value + + if target_batch_idx >= length: + break + + target_worker_id = target_batch_idx % num_workers + while True: + try: + batch_idx, trans_items = trans_data_queues[target_worker_id].get( + timeout=MP_QUEUE_GET_TIMEOUT + ) + batch_data = collator.apply(trans_items) + break + except queue.Empty: + if shutdown_flag.value == 1: + break + logger.debug( + "worker:{} data queue get timeout! target batch idx:{}".format( + target_worker_id, target_batch_idx + ) + ) + + if batch_idx != target_batch_idx: + raise RuntimeError( + "batch_idx {} mismatch the target_batch_idx {}".format( + batch_idx, target_batch_idx + ) + ) + + while True: + try: + batch_queue.put(batch_data, timeout=1) + break + except queue.Full: + if shutdown_flag.value == 1: + break + logger.debug("batch queue is full!") + + with target_idx.get_lock(): + target_idx.value += 1 + + batch_queue.disconnect_client() diff --git a/imperative/python/megengine/data/dataset/__init__.py b/imperative/python/megengine/data/dataset/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b70d22111ba33a749a8c90491b2db52a700ed44 --- /dev/null +++ b/imperative/python/megengine/data/dataset/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .meta_dataset import ArrayDataset, Dataset, MapDataset, StreamDataset +from .vision import * diff --git a/imperative/python/megengine/data/dataset/meta_dataset.py b/imperative/python/megengine/data/dataset/meta_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..4415a4274d883ce97a3d9c4102f90323a9c60820 --- /dev/null +++ b/imperative/python/megengine/data/dataset/meta_dataset.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from abc import ABC, abstractmethod +from typing import Tuple + + +class Dataset(ABC): + r""" + An abstract class for all Datasets + """ + + @abstractmethod + def __init__(self): + pass + + +class MapDataset(Dataset): + r""" + An abstract class for map data + __getitem__ and __len__ method are aditionally needed + """ + + @abstractmethod + def __init__(self): + pass + + @abstractmethod + def __getitem__(self, index): + pass + + @abstractmethod + def __len__(self): + pass + + +class StreamDataset(Dataset): + r""" + An abstract class for stream data + __iter__ method is aditionally needed + """ + + @abstractmethod + def __init__(self): + pass + + @abstractmethod + def __iter__(self): + pass + + +class ArrayDataset(MapDataset): + def __init__(self, *arrays): + r""" + ArrayDataset is a dataset for numpy array data, one or more numpy arrays + are needed to initiate the dataset. And the dimensions represented sample number + are expected to be the same. + """ + super().__init__() + if not all(len(arrays[0]) == len(array) for array in arrays): + raise ValueError("lengths of input arrays are inconsistent") + self.arrays = arrays + + def __getitem__(self, index: int) -> Tuple: + return tuple(array[index] for array in self.arrays) + + def __len__(self) -> int: + return len(self.arrays[0]) diff --git a/imperative/python/megengine/data/dataset/vision/__init__.py b/imperative/python/megengine/data/dataset/vision/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dd2b0fc302dac854b8880a7894a090ddd3a18f08 --- /dev/null +++ b/imperative/python/megengine/data/dataset/vision/__init__.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .cifar import CIFAR10, CIFAR100 +from .cityscapes import Cityscapes +from .coco import COCO +from .folder import ImageFolder +from .imagenet import ImageNet +from .meta_vision import VisionDataset +from .mnist import MNIST +from .objects365 import Objects365 +from .voc import PascalVOC diff --git a/imperative/python/megengine/data/dataset/vision/cifar.py b/imperative/python/megengine/data/dataset/vision/cifar.py new file mode 100644 index 0000000000000000000000000000000000000000..9ce73688969d707c48245a83dce30759c33bc561 --- /dev/null +++ b/imperative/python/megengine/data/dataset/vision/cifar.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import os +import pickle +import tarfile +from typing import Tuple + +import numpy as np + +from ....logger import get_logger +from .meta_vision import VisionDataset +from .utils import _default_dataset_root, load_raw_data_from_url + +logger = get_logger(__name__) + + +class CIFAR10(VisionDataset): + r""" ``Dataset`` for CIFAR10 meta data + """ + + url_path = "http://www.cs.utoronto.ca/~kriz/" + raw_file_name = "cifar-10-python.tar.gz" + raw_file_md5 = "c58f30108f718f92721af3b95e74349a" + raw_file_dir = "cifar-10-batches-py" + train_batch = [ + "data_batch_1", + "data_batch_2", + "data_batch_3", + "data_batch_4", + "data_batch_5", + ] + test_batch = ["test_batch"] + meta_info = {"name": "batches.meta"} + + def __init__( + self, + root: str = None, + train: bool = True, + download: bool = True, + timeout: int = 500, + ): + super().__init__(root, order=("image", "image_category")) + + self.timeout = timeout + + # process the root path + if root is None: + self.root = self._default_root + if not os.path.exists(self.root): + os.makedirs(self.root) + else: + self.root = root + if not os.path.exists(self.root): + if download: + logger.debug( + "dir %s does not exist, will be automatically created", + self.root, + ) + os.makedirs(self.root) + else: + raise ValueError("dir %s does not exist" % self.root) + + self.target_file = os.path.join(self.root, self.raw_file_dir) + + # check existence of target pickle dir, if exists load the + # pickle file no matter what download is set + if os.path.exists(self.target_file): + if train: + self.arrays = self.bytes2array(self.train_batch) + else: + self.arrays = self.bytes2array(self.test_batch) + else: + if download: + self.download() + if train: + self.arrays = self.bytes2array(self.train_batch) + else: + self.arrays = self.bytes2array(self.test_batch) + else: + raise ValueError( + "dir does not contain target file %s, please set download=True" + % (self.target_file) + ) + + def __getitem__(self, index: int) -> Tuple: + return tuple(array[index] for array in self.arrays) + + def __len__(self) -> int: + return len(self.arrays[0]) + + @property + def _default_root(self): + return os.path.join(_default_dataset_root(), self.__class__.__name__) + + @property + def meta(self): + meta_path = os.path.join(self.root, self.raw_file_dir, self.meta_info["name"]) + with open(meta_path, "rb") as f: + meta = pickle.load(f, encoding="bytes") + return meta + + def download(self): + url = self.url_path + self.raw_file_name + load_raw_data_from_url( + url, self.raw_file_name, self.raw_file_md5, self.root, self.timeout + ) + self.process() + + def untar(self, file_path, dirs): + assert file_path.endswith(".tar.gz") + logger.debug("untar file %s to %s", file_path, dirs) + t = tarfile.open(file_path) + t.extractall(path=dirs) + + def bytes2array(self, filenames): + data = [] + label = [] + for filename in filenames: + path = os.path.join(self.root, self.raw_file_dir, filename) + logger.debug("unpickle file %s", path) + with open(path, "rb") as fo: + dic = pickle.load(fo, encoding="bytes") + batch_data = dic[b"data"].reshape(-1, 3, 32, 32).transpose((0, 2, 3, 1)) + data.extend(list(batch_data[..., [2, 1, 0]])) + label.extend(dic[b"labels"]) + label = np.array(label, dtype=np.int32) + return (data, label) + + def process(self): + logger.info("process raw data ...") + self.untar(os.path.join(self.root, self.raw_file_name), self.root) + + +class CIFAR100(CIFAR10): + url_path = "http://www.cs.utoronto.ca/~kriz/" + raw_file_name = "cifar-100-python.tar.gz" + raw_file_md5 = "eb9058c3a382ffc7106e4002c42a8d85" + raw_file_dir = "cifar-100-python" + train_batch = ["train"] + test_batch = ["test"] + meta_info = {"name": "meta"} + + @property + def meta(self): + meta_path = os.path.join(self.root, self.raw_file_dir, self.meta_info["name"]) + with open(meta_path, "rb") as f: + meta = pickle.load(f, encoding="bytes") + return meta + + def bytes2array(self, filenames): + data = [] + fine_label = [] + coarse_label = [] + for filename in filenames: + path = os.path.join(self.root, self.raw_file_dir, filename) + logger.debug("unpickle file %s", path) + with open(path, "rb") as fo: + dic = pickle.load(fo, encoding="bytes") + batch_data = dic[b"data"].reshape(-1, 3, 32, 32).transpose((0, 2, 3, 1)) + data.extend(list(batch_data[..., [2, 1, 0]])) + fine_label.extend(dic[b"fine_labels"]) + coarse_label.extend(dic[b"coarse_labels"]) + fine_label = np.array(fine_label, dtype=np.int32) + coarse_label = np.array(coarse_label, dtype=np.int32) + return data, fine_label, coarse_label diff --git a/imperative/python/megengine/data/dataset/vision/cityscapes.py b/imperative/python/megengine/data/dataset/vision/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..aa05ac92f5d814a5b936cf4bbe7fffaccedbc838 --- /dev/null +++ b/imperative/python/megengine/data/dataset/vision/cityscapes.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# --------------------------------------------------------------------- +# Part of the following code in this file refs to torchvision +# BSD 3-Clause License +# +# Copyright (c) Soumith Chintala 2016, +# All rights reserved. +# --------------------------------------------------------------------- +import json +import os + +import cv2 +import numpy as np + +from .meta_vision import VisionDataset + + +class Cityscapes(VisionDataset): + r"""`Cityscapes `_ Dataset. + """ + + supported_order = ( + "image", + "mask", + "info", + ) + + def __init__(self, root, image_set, mode, *, order=None): + super().__init__(root, order=order, supported_order=self.supported_order) + + city_root = self.root + if not os.path.isdir(city_root): + raise RuntimeError("Dataset not found or corrupted.") + + self.mode = mode + self.images_dir = os.path.join(city_root, "leftImg8bit", image_set) + self.masks_dir = os.path.join(city_root, self.mode, image_set) + self.images, self.masks = [], [] + # self.target_type = ["instance", "semantic", "polygon", "color"] + + # for semantic segmentation + if mode == "gtFine": + valid_modes = ("train", "test", "val") + else: + valid_modes = ("train", "train_extra", "val") + + for city in os.listdir(self.images_dir): + img_dir = os.path.join(self.images_dir, city) + mask_dir = os.path.join(self.masks_dir, city) + for file_name in os.listdir(img_dir): + mask_name = "{}_{}".format( + file_name.split("_leftImg8bit")[0], + self._get_target_suffix(self.mode, "semantic"), + ) + self.images.append(os.path.join(img_dir, file_name)) + self.masks.append(os.path.join(mask_dir, mask_name)) + + def __getitem__(self, index): + target = [] + for k in self.order: + if k == "image": + image = cv2.imread(self.images[index], cv2.IMREAD_COLOR) + target.append(image) + elif k == "mask": + mask = cv2.imread(self.masks[index], cv2.IMREAD_GRAYSCALE) + mask = self._trans_mask(mask) + mask = mask[:, :, np.newaxis] + target.append(mask) + elif k == "info": + if image is None: + image = cv2.imread(self.images[index], cv2.IMREAD_COLOR) + info = [image.shape[0], image.shape[1], self.images[index]] + target.append(info) + else: + raise NotImplementedError + + return tuple(target) + + def __len__(self): + return len(self.images) + + def _trans_mask(self, mask): + trans_labels = [ + 7, + 8, + 11, + 12, + 13, + 17, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 31, + 32, + 33, + ] + label = np.ones(mask.shape) * 255 + for i, tl in enumerate(trans_labels): + label[mask == tl] = i + return label.astype(np.uint8) + + def _get_target_suffix(self, mode, target_type): + if target_type == "instance": + return "{}_instanceIds.png".format(mode) + elif target_type == "semantic": + return "{}_labelIds.png".format(mode) + elif target_type == "color": + return "{}_color.png".format(mode) + else: + return "{}_polygons.json".format(mode) + + def _load_json(self, path): + with open(path, "r") as file: + data = json.load(file) + return data + + class_names = ( + "road", + "sidewalk", + "building", + "wall", + "fence", + "pole", + "traffic light", + "traffic sign", + "vegetation", + "terrain", + "sky", + "person", + "rider", + "car", + "truck", + "bus", + "train", + "motorcycle", + "bicycle", + ) diff --git a/imperative/python/megengine/data/dataset/vision/coco.py b/imperative/python/megengine/data/dataset/vision/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..d247e52b4f6567d03dd390864ef5b9c1ee4f600c --- /dev/null +++ b/imperative/python/megengine/data/dataset/vision/coco.py @@ -0,0 +1,366 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# --------------------------------------------------------------------- +# Part of the following code in this file refs to maskrcnn-benchmark +# MIT License +# +# Copyright (c) 2018 Facebook +# --------------------------------------------------------------------- +import json +import os +from collections import defaultdict + +import cv2 +import numpy as np + +from .meta_vision import VisionDataset + +min_keypoints_per_image = 10 + + +def _count_visible_keypoints(anno): + return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) + + +def has_valid_annotation(anno, order): + # if it"s empty, there is no annotation + if len(anno) == 0: + return False + if "boxes" in order or "boxes_category" in order: + if "bbox" not in anno[0]: + return False + if "keypoints" in order: + if "keypoints" not in anno[0]: + return False + # for keypoint detection tasks, only consider valid images those + # containing at least min_keypoints_per_image + if _count_visible_keypoints(anno) < min_keypoints_per_image: + return False + return True + + +class COCO(VisionDataset): + r"""`MS COCO `_ Dataset. + """ + + supported_order = ( + "image", + "boxes", + "boxes_category", + "keypoints", + # TODO: need to check + # "polygons", + "info", + ) + + def __init__( + self, root, ann_file, remove_images_without_annotations=False, *, order=None + ): + super().__init__(root, order=order, supported_order=self.supported_order) + + with open(ann_file, "r") as f: + dataset = json.load(f) + + self.imgs = dict() + for img in dataset["images"]: + # for saving memory + if "license" in img: + del img["license"] + if "coco_url" in img: + del img["coco_url"] + if "date_captured" in img: + del img["date_captured"] + if "flickr_url" in img: + del img["flickr_url"] + self.imgs[img["id"]] = img + + self.img_to_anns = defaultdict(list) + for ann in dataset["annotations"]: + # for saving memory + if ( + "boxes" not in self.order + and "boxes_category" not in self.order + and "bbox" in ann + ): + del ann["bbox"] + if "polygons" not in self.order and "segmentation" in ann: + del ann["segmentation"] + self.img_to_anns[ann["image_id"]].append(ann) + + self.cats = dict() + for cat in dataset["categories"]: + self.cats[cat["id"]] = cat + + self.ids = list(sorted(self.imgs.keys())) + + # filter images without detection annotations + if remove_images_without_annotations: + ids = [] + for img_id in self.ids: + anno = self.img_to_anns[img_id] + # filter crowd annotations + anno = [obj for obj in anno if obj["iscrowd"] == 0] + anno = [ + obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0 + ] + if has_valid_annotation(anno, order): + ids.append(img_id) + self.img_to_anns[img_id] = anno + else: + del self.imgs[img_id] + del self.img_to_anns[img_id] + self.ids = ids + + self.json_category_id_to_contiguous_id = { + v: i + 1 for i, v in enumerate(self.cats.keys()) + } + + self.contiguous_category_id_to_json_id = { + v: k for k, v in self.json_category_id_to_contiguous_id.items() + } + + def __getitem__(self, index): + img_id = self.ids[index] + anno = self.img_to_anns[img_id] + + target = [] + for k in self.order: + if k == "image": + file_name = self.imgs[img_id]["file_name"] + path = os.path.join(self.root, file_name) + image = cv2.imread(path, cv2.IMREAD_COLOR) + target.append(image) + elif k == "boxes": + boxes = [obj["bbox"] for obj in anno] + boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) + # transfer boxes from xywh to xyxy + boxes[:, 2:] += boxes[:, :2] + target.append(boxes) + elif k == "boxes_category": + boxes_category = [obj["category_id"] for obj in anno] + boxes_category = [ + self.json_category_id_to_contiguous_id[c] for c in boxes_category + ] + boxes_category = np.array(boxes_category, dtype=np.int32) + target.append(boxes_category) + elif k == "keypoints": + keypoints = [obj["keypoints"] for obj in anno] + keypoints = np.array(keypoints, dtype=np.float32).reshape( + -1, len(self.keypoint_names), 3 + ) + target.append(keypoints) + elif k == "polygons": + polygons = [obj["segmentation"] for obj in anno] + polygons = [ + [np.array(p, dtype=np.float32).reshape(-1, 2) for p in ps] + for ps in polygons + ] + target.append(polygons) + elif k == "info": + info = self.imgs[img_id] + info = [info["height"], info["width"], info["file_name"]] + target.append(info) + else: + raise NotImplementedError + + return tuple(target) + + def __len__(self): + return len(self.ids) + + def get_img_info(self, index): + img_id = self.ids[index] + img_info = self.imgs[img_id] + return img_info + + class_names = ( + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", + ) + + classes_originID = { + "person": 1, + "bicycle": 2, + "car": 3, + "motorcycle": 4, + "airplane": 5, + "bus": 6, + "train": 7, + "truck": 8, + "boat": 9, + "traffic light": 10, + "fire hydrant": 11, + "stop sign": 13, + "parking meter": 14, + "bench": 15, + "bird": 16, + "cat": 17, + "dog": 18, + "horse": 19, + "sheep": 20, + "cow": 21, + "elephant": 22, + "bear": 23, + "zebra": 24, + "giraffe": 25, + "backpack": 27, + "umbrella": 28, + "handbag": 31, + "tie": 32, + "suitcase": 33, + "frisbee": 34, + "skis": 35, + "snowboard": 36, + "sports ball": 37, + "kite": 38, + "baseball bat": 39, + "baseball glove": 40, + "skateboard": 41, + "surfboard": 42, + "tennis racket": 43, + "bottle": 44, + "wine glass": 46, + "cup": 47, + "fork": 48, + "knife": 49, + "spoon": 50, + "bowl": 51, + "banana": 52, + "apple": 53, + "sandwich": 54, + "orange": 55, + "broccoli": 56, + "carrot": 57, + "hot dog": 58, + "pizza": 59, + "donut": 60, + "cake": 61, + "chair": 62, + "couch": 63, + "potted plant": 64, + "bed": 65, + "dining table": 67, + "toilet": 70, + "tv": 72, + "laptop": 73, + "mouse": 74, + "remote": 75, + "keyboard": 76, + "cell phone": 77, + "microwave": 78, + "oven": 79, + "toaster": 80, + "sink": 81, + "refrigerator": 82, + "book": 84, + "clock": 85, + "vase": 86, + "scissors": 87, + "teddy bear": 88, + "hair drier": 89, + "toothbrush": 90, + } + + keypoint_names = ( + "nose", + "left_eye", + "right_eye", + "left_ear", + "right_ear", + "left_shoulder", + "right_shoulder", + "left_elbow", + "right_elbow", + "left_wrist", + "right_wrist", + "left_hip", + "right_hip", + "left_knee", + "right_knee", + "left_ankle", + "right_ankle", + ) diff --git a/imperative/python/megengine/data/dataset/vision/folder.py b/imperative/python/megengine/data/dataset/vision/folder.py new file mode 100644 index 0000000000000000000000000000000000000000..7124ef56e050c83cb521ef11955bc69e6a3e42a0 --- /dev/null +++ b/imperative/python/megengine/data/dataset/vision/folder.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License + +# Copyright (c) Soumith Chintala 2016, +# All rights reserved. +# --------------------------------------------------------------------- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# +# This file has been modified by Megvii ("Megvii Modifications"). +# All Megvii Modifications are Copyright (C) 2014-2020 Megvii Inc. All rights reserved. +# --------------------------------------------------------------------- +import os +from typing import Dict, List, Tuple + +import cv2 +import numpy as np + +from .meta_vision import VisionDataset +from .utils import is_img + + +class ImageFolder(VisionDataset): + def __init__(self, root: str, check_valid_func=None, class_name: bool = False): + r""" + ImageFolder is a class for loading image data and labels from a organized folder. + + the folder is expected to be organized as followed + root/cls/xxx.img_ext + + labels are indices of sorted classes in the root directory + + :param root: root directory of an image folder + :param loader: a function used to load image from path, + if ``None``, default function that loads + images with PILwill be called + :param check_valid_func: a function used to check if files in folder are + expected image files, if ``None``, default function + that checks file extensions will be called + :param class_name: if ``True``, return class name instead of class index + + """ + super().__init__(root, order=("image", "image_category")) + + self.root = root + + if check_valid_func is not None: + self.check_valid = check_valid_func + else: + self.check_valid = is_img + + self.class_name = class_name + + self.class_dict = self.collect_class() + self.samples = self.collect_samples() + + def collect_samples(self) -> List: + samples = [] + directory = os.path.expanduser(self.root) + for key in sorted(self.class_dict.keys()): + d = os.path.join(directory, key) + if not os.path.isdir(d): + continue + for r, _, filename in sorted(os.walk(d, followlinks=True)): + for name in sorted(filename): + path = os.path.join(r, name) + if self.check_valid(path): + if self.class_name: + samples.append((path, key)) + else: + samples.append((path, self.class_dict[key])) + return samples + + def collect_class(self) -> Dict: + classes = [d.name for d in os.scandir(self.root) if d.is_dir()] + classes.sort() + return {classes[i]: np.int32(i) for i in range(len(classes))} + + def __getitem__(self, index: int) -> Tuple: + path, label = self.samples[index] + img = cv2.imread(path, cv2.IMREAD_COLOR) + return img, label + + def __len__(self): + return len(self.samples) diff --git a/imperative/python/megengine/data/dataset/vision/imagenet.py b/imperative/python/megengine/data/dataset/vision/imagenet.py new file mode 100644 index 0000000000000000000000000000000000000000..94c2396cf7497a95236b5f0ef0fa66d8c5a5e4dd --- /dev/null +++ b/imperative/python/megengine/data/dataset/vision/imagenet.py @@ -0,0 +1,248 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) Soumith Chintala 2016, +# All rights reserved. +# --------------------------------------------------------------------- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# +# This file has been modified by Megvii ("Megvii Modifications"). +# All Megvii Modifications are Copyright (C) 2014-2020 Megvii Inc. All rights reserved. +# --------------------------------------------------------------------- +import os +import shutil + +from tqdm import tqdm + +from ....distributed.group import is_distributed +from ....logger import get_logger +from ....serialization import load, save +from .folder import ImageFolder +from .utils import _default_dataset_root, calculate_md5, untar, untargz + +logger = get_logger(__name__) + + +class ImageNet(ImageFolder): + r""" + Load ImageNet from raw files or folder, expected folder looks like + + .. code-block:: bash + + ${root}/ + | [REQUIRED TAR FILES] + |- ILSVRC2012_img_train.tar + |- ILSVRC2012_img_val.tar + |- ILSVRC2012_devkit_t12.tar.gz + | [OPTIONAL IMAGE FOLDERS] + |- train/cls/xxx.${img_ext} + |- val/cls/xxx.${img_ext} + |- ILSVRC2012_devkit_t12/data/meta.mat + |- ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt + + If the image folders don't exist, raw tar files are required to get extracted and processed. + """ + + raw_file_meta = { + "train": ("ILSVRC2012_img_train.tar", "1d675b47d978889d74fa0da5fadfb00e"), + "val": ("ILSVRC2012_img_val.tar", "29b22e2961454d5413ddabcf34fc5622"), + "devkit": ("ILSVRC2012_devkit_t12.tar.gz", "fa75699e90414af021442c21a62c3abf"), + } # ImageNet raw files + default_train_dir = "train" + default_val_dir = "val" + default_devkit_dir = "ILSVRC2012_devkit_t12" + + def __init__(self, root: str = None, train: bool = True, **kwargs): + r""" + initialization: + + * if ``root`` contains ``self.target_folder`` depent on ``train``: + + * initialize ImageFolder with target_folder + + * else: + + * if all raw files are in ``root``: + + * parse ``self.target_folder`` from raw files + * initialize ImageFolder with ``self.target_folder`` + + * else: + + * raise error + + :param root: root directory of imagenet data, if root is ``None``, used default_dataset_root + :param train: if ``True``, load the train split, otherwise load the validation split + """ + + # process the root path + if root is None: + self.root = self._default_root + else: + self.root = root + + if not os.path.exists(self.root): + raise FileNotFoundError("dir %s does not exist" % self.root) + + self.devkit_dir = os.path.join(self.root, self.default_devkit_dir) + + if not os.path.exists(self.devkit_dir): + logger.warning("devkit directory %s does not exists", self.devkit_dir) + self._prepare_devkit() + + self.train = train + + if train: + self.target_folder = os.path.join(self.root, self.default_train_dir) + else: + self.target_folder = os.path.join(self.root, self.default_val_dir) + + if not os.path.exists(self.target_folder): + logger.warning( + "expected image folder %s does not exist, try to load from raw file", + self.target_folder, + ) + if not self.check_raw_file(): + raise FileNotFoundError( + "expected image folder %s does not exist, and raw files do not exist in %s" + % (self.target_folder, self.root) + ) + elif is_distributed(): + raise RuntimeError( + "extracting raw file shouldn't be done in distributed mode, use single process instead" + ) + elif train: + self._prepare_train() + else: + self._prepare_val() + + super().__init__(self.target_folder, **kwargs) + + @property + def _default_root(self): + return os.path.join(_default_dataset_root(), self.__class__.__name__) + + @property + def valid_ground_truth(self): + groud_truth_path = os.path.join( + self.devkit_dir, "data", "ILSVRC2012_validation_ground_truth.txt" + ) + if os.path.exists(groud_truth_path): + with open(groud_truth_path, "r") as f: + val_labels = f.readlines() + return [int(val_label) for val_label in val_labels] + else: + raise FileNotFoundError( + "valid ground truth file %s does not exist" % groud_truth_path + ) + + @property + def meta(self): + try: + return load(os.path.join(self.devkit_dir, "meta.pkl")) + except FileNotFoundError: + import scipy.io + + meta_path = os.path.join(self.devkit_dir, "data", "meta.mat") + if not os.path.exists(meta_path): + raise FileNotFoundError("meta file %s does not exist" % meta_path) + meta = scipy.io.loadmat(meta_path, squeeze_me=True)["synsets"] + nums_children = list(zip(*meta))[4] + meta = [ + meta[idx] + for idx, num_children in enumerate(nums_children) + if num_children == 0 + ] + idcs, wnids, classes = list(zip(*meta))[:3] + classes = [tuple(clss.split(", ")) for clss in classes] + idx_to_wnid = dict(zip(idcs, wnids)) + wnid_to_classes = dict(zip(wnids, classes)) + logger.info( + "saving cached meta file to %s", + os.path.join(self.devkit_dir, "meta.pkl"), + ) + save( + (idx_to_wnid, wnid_to_classes), + os.path.join(self.devkit_dir, "meta.pkl"), + ) + return idx_to_wnid, wnid_to_classes + + def check_raw_file(self) -> bool: + return all( + [ + os.path.exists(os.path.join(self.root, value[0])) + for _, value in self.raw_file_meta.items() + ] + ) + + def _organize_val_data(self): + id2wnid = self.meta[0] + val_idcs = self.valid_ground_truth + val_wnids = [id2wnid[idx] for idx in val_idcs] + + val_images = sorted( + [ + os.path.join(self.target_folder, image) + for image in os.listdir(self.target_folder) + ] + ) + + logger.debug("mkdir for val set wnids") + for wnid in set(val_wnids): + os.makedirs(os.path.join(self.root, self.default_val_dir, wnid)) + + logger.debug("mv val images into wnids dir") + for wnid, img_file in tqdm(zip(val_wnids, val_images)): + shutil.move( + img_file, + os.path.join( + self.root, self.default_val_dir, wnid, os.path.basename(img_file) + ), + ) + + def _prepare_val(self): + assert not self.train + raw_filename, checksum = self.raw_file_meta["val"] + raw_file = os.path.join(self.root, raw_filename) + logger.info("checksum valid tar file %s ...", raw_file) + assert ( + calculate_md5(raw_file) == checksum + ), "checksum mismatch, {} may be damaged".format(raw_file) + logger.info("extract valid tar file... this may take 10-20 minutes") + untar(os.path.join(self.root, raw_file), self.target_folder) + self._organize_val_data() + + def _prepare_train(self): + assert self.train + raw_filename, checksum = self.raw_file_meta["train"] + raw_file = os.path.join(self.root, raw_filename) + logger.info("checksum train tar file %s ...", raw_file) + assert ( + calculate_md5(raw_file) == checksum + ), "checksum mismatch, {} may be damaged".format(raw_file) + logger.info("extract train tar file.. this may take several hours") + untar( + os.path.join(self.root, raw_file), self.target_folder, + ) + paths = [ + os.path.join(self.target_folder, child_dir) + for child_dir in os.listdir(self.target_folder) + ] + for path in tqdm(paths): + untar(path, os.path.splitext(path)[0], remove=True) + + def _prepare_devkit(self): + raw_filename, checksum = self.raw_file_meta["devkit"] + raw_file = os.path.join(self.root, raw_filename) + logger.info("checksum devkit tar file %s ...", raw_file) + assert ( + calculate_md5(raw_file) == checksum + ), "checksum mismatch, {} may be damaged".format(raw_file) + logger.info("extract devkit file..") + untargz(os.path.join(self.root, self.raw_file_meta["devkit"][0])) diff --git a/imperative/python/megengine/data/dataset/vision/meta_vision.py b/imperative/python/megengine/data/dataset/vision/meta_vision.py new file mode 100644 index 0000000000000000000000000000000000000000..6d03d3eda5451a05039f513034f32444004db218 --- /dev/null +++ b/imperative/python/megengine/data/dataset/vision/meta_vision.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections.abc +import os + +from ..meta_dataset import MapDataset + + +class VisionDataset(MapDataset): + _repr_indent = 4 + + def __init__(self, root, *, order=None, supported_order=None): + if isinstance(root, (str, bytes)): + root = os.path.expanduser(root) + self.root = root + + if order is None: + order = ("image",) + if not isinstance(order, collections.abc.Sequence): + raise ValueError( + "order should be a sequence, but got order={}".format(order) + ) + + if supported_order is not None: + assert isinstance(supported_order, collections.abc.Sequence) + for k in order: + if k not in supported_order: + raise NotImplementedError("{} is unsupported data type".format(k)) + self.order = order + + def __getitem__(self, index): + raise NotImplementedError + + def __len__(self): + raise NotImplementedError diff --git a/imperative/python/megengine/data/dataset/vision/mnist.py b/imperative/python/megengine/data/dataset/vision/mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..5e89a3140556bf9449f4fdadf1bb6e6b73b1f6ad --- /dev/null +++ b/imperative/python/megengine/data/dataset/vision/mnist.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import gzip +import os +import struct +from typing import Tuple + +import numpy as np +from tqdm import tqdm + +from ....logger import get_logger +from .meta_vision import VisionDataset +from .utils import _default_dataset_root, load_raw_data_from_url + +logger = get_logger(__name__) + + +class MNIST(VisionDataset): + r""" ``Dataset`` for MNIST meta data + """ + + url_path = "http://yann.lecun.com/exdb/mnist/" + """ + url prefix for downloading raw file + """ + raw_file_name = [ + "train-images-idx3-ubyte.gz", + "train-labels-idx1-ubyte.gz", + "t10k-images-idx3-ubyte.gz", + "t10k-labels-idx1-ubyte.gz", + ] + """ + raw file names of both training set and test set (10k) + """ + raw_file_md5 = [ + "f68b3c2dcbeaaa9fbdd348bbdeb94873", + "d53e105ee54ea40749a09fcbcd1e9432", + "9fb629c4189551a2d022fa330f9573f3", + "ec29112dd5afa0611ce80d1b7f02629c", + ] + """ + md5 for checking raw files + """ + + def __init__( + self, + root: str = None, + train: bool = True, + download: bool = True, + timeout: int = 500, + ): + r""" + :param root: path for mnist dataset downloading or loading, if ``None``, + set ``root`` to the ``_default_root`` + :param train: if ``True``, loading trainingset, else loading test set + :param download: if raw files do not exists and download sets to ``True``, + download raw files and process, otherwise raise ValueError, default is True + + """ + super().__init__(root, order=("image", "image_category")) + + self.timeout = timeout + + # process the root path + if root is None: + self.root = self._default_root + if not os.path.exists(self.root): + os.makedirs(self.root) + else: + self.root = root + if not os.path.exists(self.root): + if download: + logger.debug( + "dir %s does not exist, will be automatically created", + self.root, + ) + os.makedirs(self.root) + else: + raise ValueError("dir %s does not exist" % self.root) + + if self._check_raw_files(): + self.process(train) + elif download: + self.download() + self.process(train) + else: + raise ValueError( + "root does not contain valid raw files, please set download=True" + ) + + def __getitem__(self, index: int) -> Tuple: + return tuple(array[index] for array in self.arrays) + + def __len__(self) -> int: + return len(self.arrays[0]) + + @property + def _default_root(self): + return os.path.join(_default_dataset_root(), self.__class__.__name__) + + @property + def meta(self): + return self._meta_data + + def _check_raw_files(self): + return all( + [ + os.path.exists(os.path.join(self.root, path)) + for path in self.raw_file_name + ] + ) + + def download(self): + for file_name, md5 in zip(self.raw_file_name, self.raw_file_md5): + url = self.url_path + file_name + load_raw_data_from_url(url, file_name, md5, self.root, self.timeout) + + def process(self, train): + # load raw files and transform them into meta data and datasets Tuple(np.array) + logger.info("process the raw files of %s set...", "train" if train else "test") + if train: + meta_data_images, images = parse_idx3( + os.path.join(self.root, self.raw_file_name[0]) + ) + meta_data_labels, labels = parse_idx1( + os.path.join(self.root, self.raw_file_name[1]) + ) + else: + meta_data_images, images = parse_idx3( + os.path.join(self.root, self.raw_file_name[2]) + ) + meta_data_labels, labels = parse_idx1( + os.path.join(self.root, self.raw_file_name[3]) + ) + + self._meta_data = { + "images": meta_data_images, + "labels": meta_data_labels, + } + self.arrays = (images, labels.astype(np.int32)) + + +def parse_idx3(idx3_file): + # parse idx3 file to meta data and data in numpy array (images) + logger.debug("parse idx3 file %s ...", idx3_file) + assert idx3_file.endswith(".gz") + with gzip.open(idx3_file, "rb") as f: + bin_data = f.read() + + # parse meta data + offset = 0 + fmt_header = ">iiii" + magic, imgs, height, width = struct.unpack_from(fmt_header, bin_data, offset) + meta_data = {"magic": magic, "imgs": imgs, "height": height, "width": width} + + # parse images + image_size = height * width + offset += struct.calcsize(fmt_header) + fmt_image = ">" + str(image_size) + "B" + images = [] + bar = tqdm(total=meta_data["imgs"], ncols=80) + for image in struct.iter_unpack(fmt_image, bin_data[offset:]): + images.append(np.array(image, dtype=np.uint8).reshape((height, width, 1))) + bar.update() + bar.close() + return meta_data, images + + +def parse_idx1(idx1_file): + # parse idx1 file to meta data and data in numpy array (labels) + logger.debug("parse idx1 file %s ...", idx1_file) + assert idx1_file.endswith(".gz") + with gzip.open(idx1_file, "rb") as f: + bin_data = f.read() + + # parse meta data + offset = 0 + fmt_header = ">ii" + magic, imgs = struct.unpack_from(fmt_header, bin_data, offset) + meta_data = {"magic": magic, "imgs": imgs} + + # parse labels + offset += struct.calcsize(fmt_header) + fmt_image = ">B" + labels = np.empty(imgs, dtype=int) + bar = tqdm(total=meta_data["imgs"], ncols=80) + for i, label in enumerate(struct.iter_unpack(fmt_image, bin_data[offset:])): + labels[i] = label[0] + bar.update() + bar.close() + return meta_data, labels diff --git a/imperative/python/megengine/data/dataset/vision/objects365.py b/imperative/python/megengine/data/dataset/vision/objects365.py new file mode 100644 index 0000000000000000000000000000000000000000..7c1481bac99fa2af82fb8d93856b7815024373c9 --- /dev/null +++ b/imperative/python/megengine/data/dataset/vision/objects365.py @@ -0,0 +1,498 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# --------------------------------------------------------------------- +# Part of the following code in this file refs to maskrcnn-benchmark +# MIT License +# +# Copyright (c) 2018 Facebook +# --------------------------------------------------------------------- +import json +import os +from collections import defaultdict + +import cv2 +import numpy as np + +from .meta_vision import VisionDataset + + +class Objects365(VisionDataset): + r"""`Objects365 `_ Dataset. + """ + + supported_order = ( + "image", + "boxes", + "boxes_category", + "info", + ) + + def __init__( + self, root, ann_file, remove_images_without_annotations=False, *, order=None + ): + super().__init__(root, order=order, supported_order=self.supported_order) + + with open(ann_file, "r") as f: + dataset = json.load(f) + + self.imgs = dict() + for img in dataset["images"]: + self.imgs[img["id"]] = img + + self.img_to_anns = defaultdict(list) + for ann in dataset["annotations"]: + # for saving memory + if ( + "boxes" not in self.order + and "boxes_category" not in self.order + and "bbox" in ann + ): + del ann["bbox"] + self.img_to_anns[ann["image_id"]].append(ann) + + self.cats = dict() + for cat in dataset["categories"]: + self.cats[cat["id"]] = cat + + self.ids = list(sorted(self.imgs.keys())) + + # filter images without detection annotations + if remove_images_without_annotations: + ids = [] + for img_id in self.ids: + anno = self.img_to_anns[img_id] + # filter crowd annotations + anno = [obj for obj in anno if obj["iscrowd"] == 0] + anno = [ + obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0 + ] + if len(anno) > 0: + ids.append(img_id) + self.img_to_anns[img_id] = anno + else: + del self.imgs[img_id] + del self.img_to_anns[img_id] + self.ids = ids + + self.json_category_id_to_contiguous_id = { + v: i + 1 for i, v in enumerate(self.cats.keys()) + } + + self.contiguous_category_id_to_json_id = { + v: k for k, v in self.json_category_id_to_contiguous_id.items() + } + + def __getitem__(self, index): + img_id = self.ids[index] + anno = self.img_to_anns[img_id] + + target = [] + for k in self.order: + if k == "image": + file_name = self.imgs[img_id]["file_name"] + path = os.path.join(self.root, file_name) + image = cv2.imread(path, cv2.IMREAD_COLOR) + target.append(image) + elif k == "boxes": + boxes = [obj["bbox"] for obj in anno] + boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) + # transfer boxes from xywh to xyxy + boxes[:, 2:] += boxes[:, :2] + target.append(boxes) + elif k == "boxes_category": + boxes_category = [obj["category_id"] for obj in anno] + boxes_category = [ + self.json_category_id_to_contiguous_id[c] for c in boxes_category + ] + boxes_category = np.array(boxes_category, dtype=np.int32) + target.append(boxes_category) + elif k == "info": + info = self.imgs[img_id] + info = [info["height"], info["width"], info["file_name"]] + target.append(info) + else: + raise NotImplementedError + + return tuple(target) + + def __len__(self): + return len(self.ids) + + def get_img_info(self, index): + img_id = self.ids[index] + img_info = self.imgs[img_id] + return img_info + + class_names = ( + "person", + "sneakers", + "chair", + "hat", + "lamp", + "bottle", + "cabinet/shelf", + "cup", + "car", + "glasses", + "picture/frame", + "desk", + "handbag", + "street lights", + "book", + "plate", + "helmet", + "leather shoes", + "pillow", + "glove", + "potted plant", + "bracelet", + "flower", + "tv", + "storage box", + "vase", + "bench", + "wine glass", + "boots", + "bowl", + "dining table", + "umbrella", + "boat", + "flag", + "speaker", + "trash bin/can", + "stool", + "backpack", + "couch", + "belt", + "carpet", + "basket", + "towel/napkin", + "slippers", + "barrel/bucket", + "coffee table", + "suv", + "toy", + "tie", + "bed", + "traffic light", + "pen/pencil", + "microphone", + "sandals", + "canned", + "necklace", + "mirror", + "faucet", + "bicycle", + "bread", + "high heels", + "ring", + "van", + "watch", + "sink", + "horse", + "fish", + "apple", + "camera", + "candle", + "teddy bear", + "cake", + "motorcycle", + "wild bird", + "laptop", + "knife", + "traffic sign", + "cell phone", + "paddle", + "truck", + "cow", + "power outlet", + "clock", + "drum", + "fork", + "bus", + "hanger", + "nightstand", + "pot/pan", + "sheep", + "guitar", + "traffic cone", + "tea pot", + "keyboard", + "tripod", + "hockey", + "fan", + "dog", + "spoon", + "blackboard/whiteboard", + "balloon", + "air conditioner", + "cymbal", + "mouse", + "telephone", + "pickup truck", + "orange", + "banana", + "airplane", + "luggage", + "skis", + "soccer", + "trolley", + "oven", + "remote", + "baseball glove", + "paper towel", + "refrigerator", + "train", + "tomato", + "machinery vehicle", + "tent", + "shampoo/shower gel", + "head phone", + "lantern", + "donut", + "cleaning products", + "sailboat", + "tangerine", + "pizza", + "kite", + "computer box", + "elephant", + "toiletries", + "gas stove", + "broccoli", + "toilet", + "stroller", + "shovel", + "baseball bat", + "microwave", + "skateboard", + "surfboard", + "surveillance camera", + "gun", + "life saver", + "cat", + "lemon", + "liquid soap", + "zebra", + "duck", + "sports car", + "giraffe", + "pumpkin", + "piano", + "stop sign", + "radiator", + "converter", + "tissue ", + "carrot", + "washing machine", + "vent", + "cookies", + "cutting/chopping board", + "tennis racket", + "candy", + "skating and skiing shoes", + "scissors", + "folder", + "baseball", + "strawberry", + "bow tie", + "pigeon", + "pepper", + "coffee machine", + "bathtub", + "snowboard", + "suitcase", + "grapes", + "ladder", + "pear", + "american football", + "basketball", + "potato", + "paint brush", + "printer", + "billiards", + "fire hydrant", + "goose", + "projector", + "sausage", + "fire extinguisher", + "extension cord", + "facial mask", + "tennis ball", + "chopsticks", + "electronic stove and gas stove", + "pie", + "frisbee", + "kettle", + "hamburger", + "golf club", + "cucumber", + "clutch", + "blender", + "tong", + "slide", + "hot dog", + "toothbrush", + "facial cleanser", + "mango", + "deer", + "egg", + "violin", + "marker", + "ship", + "chicken", + "onion", + "ice cream", + "tape", + "wheelchair", + "plum", + "bar soap", + "scale", + "watermelon", + "cabbage", + "router/modem", + "golf ball", + "pine apple", + "crane", + "fire truck", + "peach", + "cello", + "notepaper", + "tricycle", + "toaster", + "helicopter", + "green beans", + "brush", + "carriage", + "cigar", + "earphone", + "penguin", + "hurdle", + "swing", + "radio", + "CD", + "parking meter", + "swan", + "garlic", + "french fries", + "horn", + "avocado", + "saxophone", + "trumpet", + "sandwich", + "cue", + "kiwi fruit", + "bear", + "fishing rod", + "cherry", + "tablet", + "green vegetables", + "nuts", + "corn", + "key", + "screwdriver", + "globe", + "broom", + "pliers", + "volleyball", + "hammer", + "eggplant", + "trophy", + "dates", + "board eraser", + "rice", + "tape measure/ruler", + "dumbbell", + "hamimelon", + "stapler", + "camel", + "lettuce", + "goldfish", + "meat balls", + "medal", + "toothpaste", + "antelope", + "shrimp", + "rickshaw", + "trombone", + "pomegranate", + "coconut", + "jellyfish", + "mushroom", + "calculator", + "treadmill", + "butterfly", + "egg tart", + "cheese", + "pig", + "pomelo", + "race car", + "rice cooker", + "tuba", + "crosswalk sign", + "papaya", + "hair drier", + "green onion", + "chips", + "dolphin", + "sushi", + "urinal", + "donkey", + "electric drill", + "spring rolls", + "tortoise/turtle", + "parrot", + "flute", + "measuring cup", + "shark", + "steak", + "poker card", + "binoculars", + "llama", + "radish", + "noodles", + "yak", + "mop", + "crab", + "microscope", + "barbell", + "bread/bun", + "baozi", + "lion", + "red cabbage", + "polar bear", + "lighter", + "seal", + "mangosteen", + "comb", + "eraser", + "pitaya", + "scallop", + "pencil case", + "saw", + "table tennis paddle", + "okra", + "starfish", + "eagle", + "monkey", + "durian", + "game board", + "rabbit", + "french horn", + "ambulance", + "asparagus", + "hoverboard", + "pasta", + "target", + "hotair balloon", + "chainsaw", + "lobster", + "iron", + "flashlight", + ) diff --git a/imperative/python/megengine/data/dataset/vision/utils.py b/imperative/python/megengine/data/dataset/vision/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..9a028d9ce4120d8ce3cb468db1ba8760e5b7e1c9 --- /dev/null +++ b/imperative/python/megengine/data/dataset/vision/utils.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import hashlib +import os +import tarfile + +from ....distributed.group import is_distributed +from ....logger import get_logger +from ....utils.http_download import download_from_url + +IMG_EXT = (".jpg", ".png", ".jpeg", ".ppm", ".bmp", ".pgm", ".tif", ".tiff", ".webp") + +logger = get_logger(__name__) + + +def _default_dataset_root(): + default_dataset_root = os.path.expanduser( + os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "megengine") + ) + + return default_dataset_root + + +def load_raw_data_from_url( + url: str, filename: str, target_md5: str, raw_data_dir: str, timeout: int +): + cached_file = os.path.join(raw_data_dir, filename) + logger.debug( + "load_raw_data_from_url: downloading to or using cached %s ...", cached_file + ) + if not os.path.exists(cached_file): + if is_distributed(): + logger.warning( + "Downloading raw data in DISTRIBUTED mode\n" + " File may be downloaded multiple times. We recommend\n" + " users to download in single process first." + ) + md5 = download_from_url(url, cached_file, http_read_timeout=timeout) + else: + md5 = calculate_md5(cached_file) + if target_md5 == md5: + logger.debug("%s exists with correct md5: %s", filename, target_md5) + else: + os.remove(cached_file) + raise RuntimeError("{} exists but fail to match md5".format(filename)) + + +def calculate_md5(filename): + m = hashlib.md5() + with open(filename, "rb") as f: + while True: + data = f.read(4096) + if not data: + break + m.update(data) + return m.hexdigest() + + +def is_img(filename): + return filename.lower().endswith(IMG_EXT) + + +def untar(path, to=None, remove=False): + if to is None: + to = os.path.dirname(path) + with tarfile.open(path, "r") as tar: + tar.extractall(path=to) + + if remove: + os.remove(path) + + +def untargz(path, to=None, remove=False): + if path.endswith(".tar.gz"): + if to is None: + to = os.path.dirname(path) + with tarfile.open(path, "r:gz") as tar: + tar.extractall(path=to) + else: + raise ValueError("path %s does not end with .tar" % path) + + if remove: + os.remove(path) diff --git a/imperative/python/megengine/data/dataset/vision/voc.py b/imperative/python/megengine/data/dataset/vision/voc.py new file mode 100644 index 0000000000000000000000000000000000000000..42bf712dc172176ee040881a84ee8b2ed79a383b --- /dev/null +++ b/imperative/python/megengine/data/dataset/vision/voc.py @@ -0,0 +1,195 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# --------------------------------------------------------------------- +# Part of the following code in this file refs to torchvision +# BSD 3-Clause License +# +# Copyright (c) Soumith Chintala 2016, +# All rights reserved. +# --------------------------------------------------------------------- +import collections.abc +import os +import xml.etree.ElementTree as ET + +import cv2 +import numpy as np + +from .meta_vision import VisionDataset + + +class PascalVOC(VisionDataset): + r"""`Pascal VOC `_ Dataset. + """ + + supported_order = ( + "image", + "boxes", + "boxes_category", + "mask", + "info", + ) + + def __init__(self, root, image_set, *, order=None): + if ("boxes" in order or "boxes_category" in order) and "mask" in order: + raise ValueError( + "PascalVOC only supports boxes & boxes_category or mask, not both." + ) + + super().__init__(root, order=order, supported_order=self.supported_order) + + if not os.path.isdir(self.root): + raise RuntimeError("Dataset not found or corrupted.") + + self.image_set = image_set + image_dir = os.path.join(self.root, "JPEGImages") + + if "boxes" in order or "boxes_category" in order: + annotation_dir = os.path.join(self.root, "Annotations") + splitdet_dir = os.path.join(self.root, "ImageSets/Main") + split_f = os.path.join(splitdet_dir, image_set.rstrip("\n") + ".txt") + with open(os.path.join(split_f), "r") as f: + self.file_names = [x.strip() for x in f.readlines()] + self.images = [os.path.join(image_dir, x + ".jpg") for x in self.file_names] + self.annotations = [ + os.path.join(annotation_dir, x + ".xml") for x in self.file_names + ] + assert len(self.images) == len(self.annotations) + elif "mask" in order: + if "aug" in image_set: + mask_dir = os.path.join(self.root, "SegmentationClass_aug") + else: + mask_dir = os.path.join(self.root, "SegmentationClass") + splitmask_dir = os.path.join(self.root, "ImageSets/Segmentation") + split_f = os.path.join(splitmask_dir, image_set.rstrip("\n") + ".txt") + with open(os.path.join(split_f), "r") as f: + self.file_names = [x.strip() for x in f.readlines()] + self.images = [os.path.join(image_dir, x + ".jpg") for x in self.file_names] + self.masks = [os.path.join(mask_dir, x + ".png") for x in self.file_names] + assert len(self.images) == len(self.masks) + else: + raise NotImplementedError + + def __getitem__(self, index): + target = [] + for k in self.order: + if k == "image": + image = cv2.imread(self.images[index], cv2.IMREAD_COLOR) + target.append(image) + elif k == "boxes": + anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot()) + boxes = [obj["bndbox"] for obj in anno["annotation"]["object"]] + # boxes type xyxy + boxes = [ + (bb["xmin"], bb["ymin"], bb["xmax"], bb["ymax"]) for bb in boxes + ] + boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) + target.append(boxes) + elif k == "boxes_category": + anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot()) + boxes_category = [obj["name"] for obj in anno["annotation"]["object"]] + boxes_category = [ + self.class_names.index(bc) + 1 for bc in boxes_category + ] + boxes_category = np.array(boxes_category, dtype=np.int32) + target.append(boxes_category) + elif k == "mask": + if "aug" in self.image_set: + mask = cv2.imread(self.masks[index], cv2.IMREAD_GRAYSCALE) + else: + mask = cv2.imread(self.masks[index], cv2.IMREAD_COLOR) + mask = self._trans_mask(mask) + mask = mask[:, :, np.newaxis] + target.append(mask) + elif k == "info": + if image is None: + image = cv2.imread(self.images[index], cv2.IMREAD_COLOR) + info = [image.shape[0], image.shape[1], self.file_names[index]] + target.append(info) + else: + raise NotImplementedError + + return tuple(target) + + def __len__(self): + return len(self.images) + + def _trans_mask(self, mask): + label = np.ones(mask.shape[:2]) * 255 + for i in range(len(self.class_colors)): + b, g, r = self.class_colors[i] + label[ + (mask[:, :, 0] == b) & (mask[:, :, 1] == g) & (mask[:, :, 2] == r) + ] = i + return label.astype(np.uint8) + + def parse_voc_xml(self, node): + voc_dict = {} + children = list(node) + if children: + def_dic = collections.defaultdict(list) + for dc in map(self.parse_voc_xml, children): + for ind, v in dc.items(): + def_dic[ind].append(v) + if node.tag == "annotation": + def_dic["object"] = [def_dic["object"]] + voc_dict = { + node.tag: { + ind: v[0] if len(v) == 1 else v for ind, v in def_dic.items() + } + } + if node.text: + text = node.text.strip() + if not children: + voc_dict[node.tag] = text + return voc_dict + + class_names = ( + "aeroplane", + "bicycle", + "bird", + "boat", + "bottle", + "bus", + "car", + "cat", + "chair", + "cow", + "diningtable", + "dog", + "horse", + "motorbike", + "person", + "pottedplant", + "sheep", + "sofa", + "train", + "tvmonitor", + ) + class_colors = [ + [0, 0, 128], + [0, 128, 0], + [0, 128, 128], + [128, 0, 0], + [128, 0, 128], + [128, 128, 0], + [128, 128, 128], + [0, 0, 64], + [0, 0, 192], + [0, 128, 64], + [0, 128, 192], + [128, 0, 64], + [128, 0, 192], + [128, 128, 64], + [128, 128, 192], + [0, 64, 0], + [0, 64, 128], + [0, 192, 0], + [0, 192, 128], + [128, 64, 0], + ] diff --git a/imperative/python/megengine/data/sampler.py b/imperative/python/megengine/data/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..dbd5d3a395756a42eef606e16823f771146c9a87 --- /dev/null +++ b/imperative/python/megengine/data/sampler.py @@ -0,0 +1,274 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections.abc +import math +from abc import ABC +from typing import Any, Generator, Iterator, List, Union + +import numpy as np + +import megengine.distributed as dist + + +class Sampler(ABC): + def __init__( + self, + dataset, + batch_size=1, + drop_last=False, + num_samples=None, + world_size=None, + rank=None, + seed=None, + ): + r""" + An abstract class for all sampler + + :type dataset: `dataset` + :param dataset: dataset to sample from + :type batch_size: positive integer + :param batch_size: batch size for batch method + :type drop_last: bool + :param drop_last: set ``True`` to drop the last incomplete batch, + if the dataset size is not divisible by the batch size. If ``False`` and + the size of dataset is not divisible by the batch_size, then the last batch will + be smaller. (default: ``False``) + :type num_samples: positive integer + :param num_samples: number of samples assigned to one rank + :type world_size: positive integer + :param world_size: number of ranks + :type rank: non-negative integer within 0 and world_size + :param rank: rank id, non-negative interger within 0 and ``world_size`` + :type seed: non-negative integer + :param seed: seed for random operators + """ + if ( + not isinstance(batch_size, int) + or isinstance(batch_size, bool) + or batch_size <= 0 + ): + raise ValueError( + "batch_size should be a positive integer value, " + "but got batch_size={}".format(batch_size) + ) + if not isinstance(drop_last, bool): + raise ValueError( + "drop_last should be a boolean value, but got " + "drop_last={}".format(drop_last) + ) + if num_samples is not None and ( + not isinstance(num_samples, int) + or isinstance(num_samples, bool) + or num_samples <= 0 + ): + raise ValueError( + "num_samples should be a positive integer " + "value, but got num_samples={}".format(num_samples) + ) + + self.batch_size = batch_size + self.dataset = dataset + self.drop_last = drop_last + + if world_size is None: + world_size = dist.get_world_size() if dist.is_distributed() else 1 + self.world_size = world_size + if rank is None: + rank = dist.get_rank() if dist.is_distributed() else 0 + self.rank = rank + + if num_samples is None: + num_samples = len(self.dataset) + self.num_samples = int(math.ceil(num_samples / self.world_size)) + + # Make sure seeds are the same at each rank + if seed is None and self.world_size > 1: + seed = 0 + self.rng = np.random.RandomState(seed) + + def __iter__(self) -> Union[Generator, Iterator]: + return self.batch() + + def __len__(self) -> int: + if self.drop_last: + return self.num_samples // self.batch_size + else: + return int(math.ceil(self.num_samples / self.batch_size)) + + def sample(self): + """ + return a list contains all sample indices + """ + raise NotImplementedError + + def scatter(self, indices) -> List: + r""" + scatter method is used for splitting indices into subset, each subset + will be assigned to a rank. Indices are evenly splitted by default. + If customized indices assignment method is needed, please rewrite this method + """ + total_size = self.num_samples * self.world_size + + # add extra indices to make it evenly divisible + indices += indices[: (total_size - len(indices))] + assert len(indices) == total_size + + # subsample + indices = indices[self.rank : total_size : self.world_size] + assert len(indices) == self.num_samples + + return indices + + def batch(self) -> Iterator[List[Any]]: + r""" + batch method provides a batch indices generator + """ + indices = list(self.sample()) + + # user might pass the world_size parameter without dist, + # so dist.is_distributed() should not be used + if self.world_size > 1: + indices = self.scatter(indices) + + step, length = self.batch_size, len(indices) + batch_index = [indices[i : i + step] for i in range(0, length, step)] + + if self.drop_last and len(batch_index[-1]) < self.batch_size: + batch_index.pop() + + return iter(batch_index) + + +class SequentialSampler(Sampler): + def __init__( + self, + dataset, + batch_size=1, + drop_last=False, + indices=None, + world_size=None, + rank=None, + ): + r""" + Sample elements sequentially + """ + super().__init__(dataset, batch_size, drop_last, None, world_size, rank) + if indices is not None and not isinstance(indices, collections.abc.Sequence): + raise ValueError( + "indices should be None or a sequence, " + "but got indices={}".format(indices) + ) + self.indices = indices + + def sample(self) -> Iterator[Any]: + r""" + return a generator + """ + if self.indices is None: + return iter(range(len(self.dataset))) + else: + return self.indices + + +class RandomSampler(Sampler): + def __init__( + self, + dataset, + batch_size=1, + drop_last=False, + indices=None, + world_size=None, + rank=None, + seed=None, + ): + r""" + Sample elements randomly without replacement + """ + super().__init__(dataset, batch_size, drop_last, None, world_size, rank, seed) + if indices is not None and not isinstance(indices, collections.abc.Sequence): + raise ValueError( + "indices should be None or a sequence, " + "but got indices={}".format(indices) + ) + self.indices = indices + + def sample(self) -> List: + if self.indices is None: + return self.rng.permutation(len(self.dataset)).tolist() + else: + return self.rng.permutation(self.indices).tolist() + + +class ReplacementSampler(Sampler): + def __init__( + self, + dataset, + batch_size=1, + drop_last=False, + num_samples=None, + weights=None, + world_size=None, + rank=None, + seed=None, + ): + r""" + Sample elements randomly with replacement + + :type weights: List + :param weights: weights for sampling indices, it could be unnormalized weights + """ + super().__init__( + dataset, batch_size, drop_last, num_samples, world_size, rank, seed + ) + if weights is not None: + if not isinstance(weights, collections.abc.Sequence): + raise ValueError( + "weights should be None or a sequence, " + "but got weights={}".format(weights) + ) + if len(weights) != len(dataset): + raise ValueError( + "len(dataset)={} should be equal to" + "len(weights)={}".format(len(dataset), len(weights)) + ) + self.weights = weights + if self.weights is not None: + self.weights = np.array(weights) / sum(weights) + + def sample(self) -> List: + n = len(self.dataset) + if self.weights is None: + return self.rng.randint(n, size=self.num_samples).tolist() + else: + return self.rng.multinomial(n, self.weights, self.num_samples).tolist() + + +class Infinite(Sampler): + r"""Infinite Sampler warper for basic sampler""" + + def sample(self): + raise NotImplementedError("sample method not supported in Infinite") + + def __init__(self, sampler): + self.sampler = sampler + self.sampler_iter = iter(self.sampler) + + def __iter__(self): + return self + + def __next__(self): + try: + index = next(self.sampler_iter) + except StopIteration: + self.sampler_iter = iter(self.sampler) + index = next(self.sampler_iter) + return index + + def __len__(self): + return np.iinfo(np.int64).max diff --git a/imperative/python/megengine/data/transform/__init__.py b/imperative/python/megengine/data/transform/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..30424cbc6c31e8c4b9dde9ed10a65b3317b74294 --- /dev/null +++ b/imperative/python/megengine/data/transform/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .meta_transform import PseudoTransform, Transform +from .vision import * diff --git a/imperative/python/megengine/data/transform/meta_transform.py b/imperative/python/megengine/data/transform/meta_transform.py new file mode 100644 index 0000000000000000000000000000000000000000..d7fd4f47a457e16e457ada5f9bc2ac92bb732cdc --- /dev/null +++ b/imperative/python/megengine/data/transform/meta_transform.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from abc import ABC, abstractmethod +from typing import Sequence, Tuple + + +class Transform(ABC): + """ + rewrite apply method in subclass + """ + + def apply_batch(self, inputs: Sequence[Tuple]): + return tuple(self.apply(input) for input in inputs) + + @abstractmethod + def apply(self, input: Tuple): + pass + + def __repr__(self): + return self.__class__.__name__ + + +class PseudoTransform(Transform): + def apply(self, input: Tuple): + return input diff --git a/imperative/python/megengine/data/transform/vision/__init__.py b/imperative/python/megengine/data/transform/vision/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d90c9e989d1bd7515e1419564d58b87c9cce028e --- /dev/null +++ b/imperative/python/megengine/data/transform/vision/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .transform import * diff --git a/imperative/python/megengine/data/transform/vision/functional.py b/imperative/python/megengine/data/transform/vision/functional.py new file mode 100644 index 0000000000000000000000000000000000000000..e2f4e512d624529f1c4c473632e3495913e8ee74 --- /dev/null +++ b/imperative/python/megengine/data/transform/vision/functional.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections.abc +import functools +import random + +import cv2 +import numpy as np + + +def wrap_keepdims(func): + """Wraper to keep the dimension of input images unchanged""" + + @functools.wraps(func) + def wrapper(image, *args, **kwargs): + if len(image.shape) != 3: + raise ValueError( + "image must have 3 dims, but got {} dims".format(len(image.shape)) + ) + ret = func(image, *args, **kwargs) + if len(ret.shape) == 2: + ret = ret[:, :, np.newaxis] + return ret + + return wrapper + + +@wrap_keepdims +def to_gray(image): + r""" + Change BGR format image's color space to gray + + :param image: Input BGR format image, with (H, W, C) shape + :return: Gray format image, with (H, W, C) shape + """ + return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + +@wrap_keepdims +def to_bgr(image): + r""" + Change gray format image's color space to BGR + + :param image: input Gray format image, with (H, W, C) shape + :return: BGR format image, with (H, W, C) shape + """ + return cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) + + +@wrap_keepdims +def pad(input, size, value): + r""" + Pad input data with *value* and given *size* + + :param input: Input data, with (H, W, C) shape + :param size: Padding size of input data, it could be integer or sequence. + If it's an integer, the input data will be padded in four directions. + If it's a sequence contains two integer, the bottom and right side + of input data will be padded. + If it's a sequence contains four integer, the top, bottom, left, right + side of input data will be padded with given size. + :param value: Padding value of data, could be a sequence of int or float. + if it's float value, the dtype of image will be casted to float32 also. + :return: Padded image + """ + if isinstance(size, int): + size = (size, size, size, size) + elif isinstance(size, collections.abc.Sequence) and len(size) == 2: + size = (0, size[0], 0, size[1]) + if np.array(value).dtype == float: + input = input.astype(np.float32) + return cv2.copyMakeBorder(input, *size, cv2.BORDER_CONSTANT, value=value) + + +@wrap_keepdims +def flip(image, flipCode): + r""" + Accordding to the flipCode (the type of flip), flip the input image + + :param image: Input image, with (H, W, C) shape + :param flipCode: code that indicates the type of flip. + 1 : Flip horizontally + 0 : Flip vertically + -1 : Flip horizontally and vertically + :return: BGR format image, with (H, W, C) shape + """ + return cv2.flip(image, flipCode=flipCode) + + +@wrap_keepdims +def resize(input, size, interpolation=cv2.INTER_LINEAR): + r""" + resize the input data to given size + + :param input: Input data, could be image or masks, with (H, W, C) shape + :param size: Target size of input data, with (height, width) shape. + :param interpolation: Interpolation method. + :return: Resized data, with (H, W, C) shape + """ + if len(size) != 2: + raise ValueError("resize needs (h, w), but got {}".format(size)) + + if isinstance(interpolation, collections.abc.Sequence): + interpolation = random.choice(interpolation) + return cv2.resize(input, size[::-1], interpolation=interpolation) diff --git a/imperative/python/megengine/data/transform/vision/transform.py b/imperative/python/megengine/data/transform/vision/transform.py new file mode 100644 index 0000000000000000000000000000000000000000..bf3834a95507df99388908b5f26c11fb171e180c --- /dev/null +++ b/imperative/python/megengine/data/transform/vision/transform.py @@ -0,0 +1,1025 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections.abc +import math +from typing import Sequence, Tuple + +import cv2 +import numpy as np + +from megengine.data.transform import Transform +from megengine.data.transform.vision import functional as F + +__all__ = [ + "VisionTransform", + "ToMode", + "Compose", + "TorchTransformCompose", + "Pad", + "Resize", + "ShortestEdgeResize", + "RandomResize", + "RandomCrop", + "RandomResizedCrop", + "CenterCrop", + "RandomHorizontalFlip", + "RandomVerticalFlip", + "Normalize", + "GaussianNoise", + "BrightnessTransform", + "SaturationTransform", + "ContrastTransform", + "HueTransform", + "ColorJitter", + "Lighting", +] + + +class VisionTransform(Transform): + r""" + Base class of all transforms used in computer vision. + calling logic: apply_batch() -> apply() -> _apply_image() and other _apply_*() + method. If you want to implement a self-defined transform method for image, + rewrite _apply_image method in subclass. + + :param order: Input type order. Input is a tuple contains different structures, + order is used to specify the order of structures. For example, if your input + is (image, boxes) type, then the order should be ("image", "boxes"). + Current available strings & data type are describe below: + + * "image": input image, with shape of (H, W, C) + * "coords": coordinates, with shape of (N, 2) + * "boxes": bounding boxes, with shape of (N, 4), "xyxy" format, + the 1st "xy" represents top left point of a box, + the 2nd "xy" represents right bottom point. + * "mask": map used for segmentation, with shape of (H, W, 1) + * "keypoints": keypoints with shape of (N, K, 3), N for number of instances, + and K for number of keypoints in one instance. The first two dimensions + of last axis is coordinate of keypoints and the the 3rd dimension is + the label of keypoints. + * "polygons": A sequence contains numpy array, its length is number of instances. + Each numpy array represents polygon coordinate of one instance. + * "category": categories for some data type. For example, "image_category" + means category of the input image and "boxes_category" means categories of + bounding boxes. + * "info": information for images such as image shapes and image path. + + You can also customize your data types only if you implement the corresponding + _apply_*() methods, otherwise ``NotImplementedError`` will be raised. + """ + + def __init__(self, order=None): + super().__init__() + if order is None: + order = ("image",) + elif not isinstance(order, collections.abc.Sequence): + raise ValueError( + "order should be a sequence, but got order={}".format(order) + ) + for k in order: + if k in ("batch",): + raise ValueError("{} is invalid data type".format(k)) + elif k.endswith("category") or k.endswith("info"): + # when the key is *category or info, we should do nothing + # if the corresponding apply methods are not implemented. + continue + elif self._get_apply(k) is None: + raise NotImplementedError("{} is unsupported data type".format(k)) + self.order = order + + def apply_batch(self, inputs: Sequence[Tuple]): + r"""Apply transform on batch input data""" + return tuple(self.apply(input) for input in inputs) + + def apply(self, input: Tuple): + r"""Apply transform on single input data""" + if not isinstance(input, tuple): + input = (input,) + + output = [] + for i in range(min(len(input), len(self.order))): + apply_func = self._get_apply(self.order[i]) + if apply_func is None: + output.append(input[i]) + else: + output.append(apply_func(input[i])) + if len(input) > len(self.order): + output.extend(input[len(self.order) :]) + + if len(output) == 1: + output = output[0] + else: + output = tuple(output) + return output + + def _get_apply(self, key): + return getattr(self, "_apply_{}".format(key), None) + + def _get_image(self, input: Tuple): + if not isinstance(input, tuple): + input = (input,) + return input[self.order.index("image")] + + def _apply_image(self, image): + raise NotImplementedError + + def _apply_coords(self, coords): + raise NotImplementedError + + def _apply_boxes(self, boxes): + idxs = np.array([(0, 1), (2, 1), (0, 3), (2, 3)]).flatten() + coords = np.asarray(boxes).reshape(-1, 4)[:, idxs].reshape(-1, 2) + coords = self._apply_coords(coords).reshape((-1, 4, 2)) + minxy = coords.min(axis=1) + maxxy = coords.max(axis=1) + trans_boxes = np.concatenate((minxy, maxxy), axis=1) + return trans_boxes + + def _apply_mask(self, mask): + raise NotImplementedError + + def _apply_keypoints(self, keypoints): + coords, visibility = keypoints[..., :2], keypoints[..., 2:] + trans_coords = [self._apply_coords(p) for p in coords] + return np.concatenate((trans_coords, visibility), axis=-1) + + def _apply_polygons(self, polygons): + return [[self._apply_coords(p) for p in instance] for instance in polygons] + + +class ToMode(VisionTransform): + r"""Change input data to a target mode. + For example, most transforms use HWC mode image, + while the Neural Network might use CHW mode input tensor + + :param mode: Output mode of input. Use "CHW" mode by default. + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, mode="CHW", *, order=None): + super().__init__(order) + assert mode in ["CHW"], "unsupported mode: {}".format(mode) + self.mode = mode + + def _apply_image(self, image): + if self.mode == "CHW": + return np.ascontiguousarray(np.rollaxis(image, 2)) + return image + + def _apply_coords(self, coords): + return coords + + def _apply_mask(self, mask): + if self.mode == "CHW": + return np.ascontiguousarray(np.rollaxis(mask, 2)) + return mask + + +class Compose(VisionTransform): + r""" + Composes several transforms together. + + :param transforms: List of :class:`VisionTransform` to compose. + :param batch_compose: Whether use shuffle_indices for batch data or not. + If True, use original input sequence. + Otherwise, the shuffle_indices will be used for transforms. + :param shuffle_indices: Indices used for random shuffle, start at 1. + For example, if shuffle_indices is [(1, 3), (2, 4)], then the 1st and 3rd transform + will be random shuffled, the 2nd and 4th transform will also be shuffled. + :param order: The same with :class:`VisionTransform` + + Example: + + ..testcode:: + + from megengine.data.transform import RandomHorizontalFlip, RandomVerticalFlip, CenterCrop, ToMode, Compose + + transform_func = Compose([ + RandomHorizontalFlip(), + RandomVerticalFlip(), + CenterCrop(100), + ToMode("CHW"), + ], + shuffle_indices=[(1, 2, 3)] + ) + """ + + def __init__( + self, transforms=[], batch_compose=False, shuffle_indices=None, *, order=None + ): + super().__init__(order) + self.transforms = transforms + self._set_order() + + if batch_compose and shuffle_indices is not None: + raise ValueError( + "Do not support shuffle when apply transforms along the whole batch" + ) + self.batch_compose = batch_compose + + if shuffle_indices is not None: + shuffle_indices = [tuple(x - 1 for x in idx) for idx in shuffle_indices] + self.shuffle_indices = shuffle_indices + + def _set_order(self): + for t in self.transforms: + t.order = self.order + if isinstance(t, Compose): + t._set_order() + + def apply_batch(self, inputs: Sequence[Tuple]): + if self.batch_compose: + for t in self.transforms: + inputs = t.apply_batch(inputs) + return inputs + else: + return super().apply_batch(inputs) + + def apply(self, input: Tuple): + for t in self._shuffle(): + input = t.apply(input) + return input + + def _shuffle(self): + if self.shuffle_indices is not None: + source_idx = list(range(len(self.transforms))) + for idx in self.shuffle_indices: + shuffled = np.random.permutation(idx).tolist() + for src, dst in zip(idx, shuffled): + source_idx[src] = dst + return [self.transforms[i] for i in source_idx] + else: + return self.transforms + + +class TorchTransformCompose(VisionTransform): + r""" + Compose class used for transforms in torchvision, only support PIL image, + some transforms with tensor in torchvision are not supported, + such as Normalize and ToTensor in torchvision. + + :param transforms: The same with ``Compose`` + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, transforms, *, order=None): + super().__init__(order) + self.transforms = transforms + + def _apply_image(self, image): + from PIL import Image + + try: + import accimage + except ImportError: + accimage = None + + if image.shape[0] == 3: # CHW + image = np.ascontiguousarray(image[[2, 1, 0]]) + elif image.shape[2] == 3: # HWC + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = Image.fromarray(image.astype(np.uint8)) + + for t in self.transforms: + image = t(image) + + if isinstance(image, Image.Image) or ( + accimage is not None and isinstance(image, accimage.Image) + ): + image = np.array(image, dtype=np.uint8) + if image.shape[0] == 3: # CHW + image = np.ascontiguousarray(image[[2, 1, 0]]) + elif image.shape[2] == 3: # HWC + image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + return image + + +class Pad(VisionTransform): + r"""Pad the input data. + + :param size: Padding size of input image, it could be integer or sequence. + If it's an integer, the input image will be padded in four directions. + If it's a sequence contains two integer, the bottom and right side + of image will be padded. + If it's a sequence contains four integer, the top, bottom, left, right + side of image will be padded with given size. + :param value: Padding value of image, could be a sequence of int or float. + if it's float value, the dtype of image will be casted to float32 also. + :param mask_value: Padding value of segmentation map. + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, size=0, value=0, mask_value=0, *, order=None): + super().__init__(order) + if isinstance(size, int): + size = (size, size, size, size) + elif isinstance(size, collections.abc.Sequence) and len(size) == 2: + size = (0, size[0], 0, size[1]) + elif not (isinstance(size, collections.abc.Sequence) and len(size) == 4): + raise ValueError( + "size should be a list/tuple which contains " + "(top, down, left, right) four pad sizes." + ) + self.size = size + self.value = value + if not isinstance(mask_value, int): + raise ValueError( + "mask_value should be a positive integer, " + "but got mask_value={}".format(mask_value) + ) + self.mask_value = mask_value + + def _apply_image(self, image): + return F.pad(image, self.size, self.value) + + def _apply_coords(self, coords): + coords[:, 0] += self.size[2] + coords[:, 1] += self.size[0] + return coords + + def _apply_mask(self, mask): + return F.pad(mask, self.size, self.mask_value) + + +class Resize(VisionTransform): + r"""Resize the input data. + + :param output_size: Target size of image, with (height, width) shape. + :param interpolation: Interpolation method. All methods are listed below: + + * cv2.INTER_NEAREST – a nearest-neighbor interpolation. + * cv2.INTER_LINEAR – a bilinear interpolation (used by default). + * cv2.INTER_AREA – resampling using pixel area relation. + * cv2.INTER_CUBIC – a bicubic interpolation over 4×4 pixel neighborhood. + * cv2.INTER_LANCZOS4 – a Lanczos interpolation over 8×8 pixel neighborhood. + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, output_size, interpolation=cv2.INTER_LINEAR, *, order=None): + super().__init__(order) + self.output_size = output_size + self.interpolation = interpolation + + def apply(self, input: Tuple): + self._shape_info = self._get_shape(self._get_image(input)) + return super().apply(input) + + def _apply_image(self, image): + h, w, th, tw = self._shape_info + if h == th and w == tw: + return image + return F.resize(image, (th, tw), self.interpolation) + + def _apply_coords(self, coords): + h, w, th, tw = self._shape_info + if h == th and w == tw: + return coords + coords[:, 0] = coords[:, 0] * (tw / w) + coords[:, 1] = coords[:, 1] * (th / h) + return coords + + def _apply_mask(self, mask): + h, w, th, tw = self._shape_info + if h == th and w == tw: + return mask + return F.resize(mask, (th, tw), cv2.INTER_NEAREST) + + def _get_shape(self, image): + h, w, _ = image.shape + if isinstance(self.output_size, int): + if min(h, w) == self.output_size: + return h, w, h, w + if h < w: + th = self.output_size + tw = int(self.output_size * w / h) + else: + tw = self.output_size + th = int(self.output_size * h / w) + return h, w, th, tw + else: + return (h, w, *self.output_size) + + +class ShortestEdgeResize(VisionTransform): + def __init__( + self, + min_size, + max_size, + sample_style="range", + interpolation=cv2.INTER_LINEAR, + *, + order=None + ): + super().__init__(order) + if sample_style not in ("range", "choice"): + raise NotImplementedError( + "{} is unsupported sample style".format(sample_style) + ) + self.sample_style = sample_style + if isinstance(min_size, int): + min_size = (min_size, min_size) + self.min_size = min_size + self.max_size = max_size + self.interpolation = interpolation + + def apply(self, input: Tuple): + self._shape_info = self._get_shape(self._get_image(input)) + return super().apply(input) + + def _apply_image(self, image): + h, w, th, tw = self._shape_info + if h == th and w == tw: + return image + return F.resize(image, (th, tw), self.interpolation) + + def _apply_coords(self, coords): + h, w, th, tw = self._shape_info + if h == th and w == tw: + return coords + coords[:, 0] = coords[:, 0] * (tw / w) + coords[:, 1] = coords[:, 1] * (th / h) + return coords + + def _apply_mask(self, mask): + h, w, th, tw = self._shape_info + if h == th and w == tw: + return mask + return F.resize(mask, (th, tw), cv2.INTER_NEAREST) + + def _get_shape(self, image): + h, w, _ = image.shape + if self.sample_style == "range": + size = np.random.randint(self.min_size[0], self.min_size[1] + 1) + else: + size = np.random.choice(self.min_size) + + scale = size / min(h, w) + if h < w: + th, tw = size, scale * w + else: + th, tw = scale * h, size + if max(th, tw) > self.max_size: + scale = self.max_size / max(th, tw) + th = th * scale + tw = tw * scale + th = int(round(th)) + tw = int(round(tw)) + return h, w, th, tw + + +class RandomResize(VisionTransform): + r"""Resize the input data randomly. + + :param scale_range: . + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, scale_range, interpolation=cv2.INTER_LINEAR, *, order=None): + super().__init__(order) + self.scale_range = scale_range + self.interpolation = interpolation + + def apply(self, input: Tuple): + self._shape_info = self._get_shape(self._get_image(input)) + return super().apply(input) + + def _apply_image(self, image): + h, w, th, tw = self._shape_info + if h == th and w == tw: + return image + return F.resize(image, (th, tw), self.interpolation) + + def _apply_coords(self, coords): + h, w, th, tw = self._shape_info + if h == th and w == tw: + return coords + coords[:, 0] = coords[:, 0] * (tw / w) + coords[:, 1] = coords[:, 1] * (th / h) + return coords + + def _apply_mask(self, mask): + h, w, th, tw = self._shape_info + if h == th and w == tw: + return mask + return F.resize(mask, (th, tw), cv2.INTER_NEAREST) + + def _get_shape(self, image): + h, w, _ = image.shape + scale = np.random.uniform(*self.scale_range) + th = int(round(h * scale)) + tw = int(round(w * scale)) + return h, w, th, tw + + +class RandomCrop(VisionTransform): + r"""Crop the input data randomly. Before applying the crop transform, + pad the image first. And if target size is still bigger than the size of + padded image, pad the image size to target size. + + :param output_size: Target size of output image, with (height, width) shape. + :param padding_size: The same with `size` in ``Pad`` + :param padding_value: The same with `value` in ``Pad`` + :param order: The same with :class:`VisionTransform` + """ + + def __init__( + self, + output_size, + padding_size=0, + padding_value=[0, 0, 0], + padding_maskvalue=0, + *, + order=None + ): + super().__init__(order) + if isinstance(output_size, int): + self.output_size = (output_size, output_size) + else: + self.output_size = output_size + self.pad = Pad(padding_size, padding_value, order=self.order) + self.padding_value = padding_value + self.padding_maskvalue = padding_maskvalue + + def apply(self, input): + input = self.pad.apply(input) + self._h, self._w, _ = self._get_image(input).shape + self._th, self._tw = self.output_size + self._x = np.random.randint(0, max(0, self._w - self._tw) + 1) + self._y = np.random.randint(0, max(0, self._h - self._th) + 1) + return super().apply(input) + + def _apply_image(self, image): + if self._th > self._h: + image = F.pad(image, (self._th - self._h, 0), self.padding_value) + if self._tw > self._w: + image = F.pad(image, (0, self._tw - self._w), self.padding_value) + return image[self._y : self._y + self._th, self._x : self._x + self._tw] + + def _apply_coords(self, coords): + coords[:, 0] -= self._x + coords[:, 1] -= self._y + return coords + + def _apply_mask(self, mask): + if self._th > self._h: + mask = F.pad(mask, (self._th - self._h, 0), self.padding_maskvalue) + if self._tw > self._w: + mask = F.pad(mask, (0, self._tw - self._w), self.padding_maskvalue) + return mask[self._y : self._y + self._th, self._x : self._x + self._tw] + + +class RandomResizedCrop(VisionTransform): + r"""Crop the input data to random size and aspect ratio. + A crop of random size (default: of 0.08 to 1.0) of the original size and a random + aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made. + After applying crop transfrom, the input data will be resized to given size. + + :param output_size: Target size of output image, with (height, width) shape. + :param scale_range: Range of size of the origin size cropped. Default: (0.08, 1.0) + :param ratio_range: Range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33) + :param order: The same with :class:`VisionTransform` + """ + + def __init__( + self, + output_size, + scale_range=(0.08, 1.0), + ratio_range=(3.0 / 4, 4.0 / 3), + interpolation=cv2.INTER_LINEAR, + *, + order=None + ): + super().__init__(order) + if isinstance(output_size, int): + self.output_size = (output_size, output_size) + else: + self.output_size = output_size + assert ( + scale_range[0] <= scale_range[1] + ), "scale_range should be of kind (min, max)" + assert ( + ratio_range[0] <= ratio_range[1] + ), "ratio_range should be of kind (min, max)" + self.scale_range = scale_range + self.ratio_range = ratio_range + self.interpolation = interpolation + + def apply(self, input: Tuple): + self._coord_info = self._get_coord(self._get_image(input)) + return super().apply(input) + + def _apply_image(self, image): + x, y, w, h = self._coord_info + cropped_img = image[y : y + h, x : x + w] + return F.resize(cropped_img, self.output_size, self.interpolation) + + def _apply_coords(self, coords): + x, y, w, h = self._coord_info + coords[:, 0] = (coords[:, 0] - x) * self.output_size[1] / w + coords[:, 1] = (coords[:, 1] - y) * self.output_size[0] / h + return coords + + def _apply_mask(self, mask): + x, y, w, h = self._coord_info + cropped_mask = mask[y : y + h, x : x + w] + return F.resize(cropped_mask, self.output_size, cv2.INTER_NEAREST) + + def _get_coord(self, image, attempts=10): + height, width, _ = image.shape + area = height * width + + for _ in range(attempts): + target_area = np.random.uniform(*self.scale_range) * area + log_ratio = tuple(math.log(x) for x in self.ratio_range) + aspect_ratio = math.exp(np.random.uniform(*log_ratio)) + + w = int(round(math.sqrt(target_area * aspect_ratio))) + h = int(round(math.sqrt(target_area / aspect_ratio))) + + if 0 < w <= width and 0 < h <= height: + x = np.random.randint(0, width - w + 1) + y = np.random.randint(0, height - h + 1) + return x, y, w, h + + # Fallback to central crop + in_ratio = float(width) / float(height) + if in_ratio < min(self.ratio_range): + w = width + h = int(round(w / min(self.ratio_range))) + elif in_ratio > max(self.ratio_range): + h = height + w = int(round(h * max(self.ratio_range))) + else: # whole image + w = width + h = height + x = (width - w) // 2 + y = (height - h) // 2 + return x, y, w, h + + +class CenterCrop(VisionTransform): + r"""Crops the given the input data at the center. + + :param output_size: Target size of output image, with (height, width) shape. + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, output_size, *, order=None): + super().__init__(order) + if isinstance(output_size, int): + self.output_size = (output_size, output_size) + else: + self.output_size = output_size + + def apply(self, input: Tuple): + self._coord_info = self._get_coord(self._get_image(input)) + return super().apply(input) + + def _apply_image(self, image): + x, y = self._coord_info + th, tw = self.output_size + return image[y : y + th, x : x + tw] + + def _apply_coords(self, coords): + x, y = self._coord_info + coords[:, 0] -= x + coords[:, 1] -= y + return coords + + def _apply_mask(self, mask): + x, y = self._coord_info + th, tw = self.output_size + return mask[y : y + th, x : x + tw] + + def _get_coord(self, image): + th, tw = self.output_size + h, w, _ = image.shape + assert th <= h and tw <= w, "output size is bigger than image size" + x = int(round((w - tw) / 2.0)) + y = int(round((h - th) / 2.0)) + return x, y + + +class RandomHorizontalFlip(VisionTransform): + r"""Horizontally flip the input data randomly with a given probability. + + :param p: probability of the input data being flipped. Default: 0.5 + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, prob: float = 0.5, *, order=None): + super().__init__(order) + self.prob = prob + + def apply(self, input: Tuple): + self._flipped = np.random.random() < self.prob + self._w = self._get_image(input).shape[1] + return super().apply(input) + + def _apply_image(self, image): + if self._flipped: + return F.flip(image, flipCode=1) + return image + + def _apply_coords(self, coords): + if self._flipped: + coords[:, 0] = self._w - coords[:, 0] + return coords + + def _apply_mask(self, mask): + if self._flipped: + return F.flip(mask, flipCode=1) + return mask + + +class RandomVerticalFlip(VisionTransform): + r"""Vertically flip the input data randomly with a given probability. + + :param p: probability of the input data being flipped. Default: 0.5 + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, prob: float = 0.5, *, order=None): + super().__init__(order) + self.prob = prob + + def apply(self, input: Tuple): + self._flipped = np.random.random() < self.prob + self._h = self._get_image(input).shape[0] + return super().apply(input) + + def _apply_image(self, image): + if self._flipped: + return F.flip(image, flipCode=0) + return image + + def _apply_coords(self, coords): + if self._flipped: + coords[:, 1] = self._h - coords[:, 1] + return coords + + def _apply_mask(self, mask): + if self._flipped: + return F.flip(mask, flipCode=0) + return mask + + +class Normalize(VisionTransform): + r"""Normalize the input data with mean and standard deviation. + Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, + this transform will normalize each channel of the input data. + ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` + + :param mean: Sequence of means for each channel. + :param std: Sequence of standard deviations for each channel. + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, mean=0.0, std=1.0, *, order=None): + super().__init__(order) + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + + def _apply_image(self, image): + return (image - self.mean) / self.std + + def _apply_coords(self, coords): + return coords + + def _apply_mask(self, mask): + return mask + + +class GaussianNoise(VisionTransform): + r"""Add random gaussian noise to the input data. + Gaussian noise is generated with given mean and std. + + :param mean: Gaussian mean used to generate noise. + :param std: Gaussian standard deviation used to generate noise. + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, mean=0.0, std=1.0, *, order=None): + super().__init__(order) + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + + def _apply_image(self, image): + dtype = image.dtype + noise = np.random.normal(self.mean, self.std, image.shape) * 255 + image = image + noise.astype(np.float32) + return np.clip(image, 0, 255).astype(dtype) + + def _apply_coords(self, coords): + return coords + + def _apply_mask(self, mask): + return mask + + +class BrightnessTransform(VisionTransform): + r"""Adjust brightness of the input data. + + :param value: How much to adjust the brightness. Can be any + non negative number. 0 gives the original image + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, value, *, order=None): + super().__init__(order) + if value < 0: + raise ValueError("brightness value should be non-negative") + self.value = value + + def _apply_image(self, image): + if self.value == 0: + return image + + dtype = image.dtype + image = image.astype(np.float32) + alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value) + image = image * alpha + return image.clip(0, 255).astype(dtype) + + def _apply_coords(self, coords): + return coords + + def _apply_mask(self, mask): + return mask + + +class ContrastTransform(VisionTransform): + r"""Adjust contrast of the input data. + + :param value: How much to adjust the contrast. Can be any + non negative number. 0 gives the original image + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, value, *, order=None): + super().__init__(order) + if value < 0: + raise ValueError("contrast value should be non-negative") + self.value = value + + def _apply_image(self, image): + if self.value == 0: + return image + + dtype = image.dtype + image = image.astype(np.float32) + alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value) + image = image * alpha + F.to_gray(image).mean() * (1 - alpha) + return image.clip(0, 255).astype(dtype) + + def _apply_coords(self, coords): + return coords + + def _apply_mask(self, mask): + return mask + + +class SaturationTransform(VisionTransform): + r"""Adjust saturation of the input data. + + :param value: How much to adjust the saturation. Can be any + non negative number. 0 gives the original image + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, value, *, order=None): + super().__init__(order) + if value < 0: + raise ValueError("saturation value should be non-negative") + self.value = value + + def _apply_image(self, image): + if self.value == 0: + return image + + dtype = image.dtype + image = image.astype(np.float32) + alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value) + image = image * alpha + F.to_gray(image) * (1 - alpha) + return image.clip(0, 255).astype(dtype) + + def _apply_coords(self, coords): + return coords + + def _apply_mask(self, mask): + return mask + + +class HueTransform(VisionTransform): + r"""Adjust hue of the input data. + + :param value: How much to adjust the hue. Can be any number + between 0 and 0.5, 0 gives the original image + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, value, *, order=None): + super().__init__(order) + if value < 0 or value > 0.5: + raise ValueError("hue value should be in [0.0, 0.5]") + self.value = value + + def _apply_image(self, image): + if self.value == 0: + return image + + dtype = image.dtype + image = image.astype(np.uint8) + hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV_FULL) + h, s, v = cv2.split(hsv_image) + + alpha = np.random.uniform(-self.value, self.value) + h = h.astype(np.uint8) + # uint8 addition take cares of rotation across boundaries + with np.errstate(over="ignore"): + h += np.uint8(alpha * 255) + hsv_image = cv2.merge([h, s, v]) + return cv2.cvtColor(hsv_image, cv2.COLOR_HSV2BGR_FULL).astype(dtype) + + def _apply_coords(self, coords): + return coords + + def _apply_mask(self, mask): + return mask + + +class ColorJitter(VisionTransform): + r"""Randomly change the brightness, contrast, saturation and hue of an image. + + :param brightness: How much to jitter brightness. + Chosen uniformly from [max(0, 1 - brightness), 1 + brightness] + or the given [min, max]. Should be non negative numbers. + :param contrast: How much to jitter contrast. + Chosen uniformly from [max(0, 1 - contrast), 1 + contrast] + or the given [min, max]. Should be non negative numbers. + :param saturation: How much to jitter saturation. + Chosen uniformly from [max(0, 1 - saturation), 1 + saturation] + or the given [min, max]. Should be non negative numbers. + :param hue: How much to jitter hue. + Chosen uniformly from [-hue, hue] or the given [min, max]. + Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5. + :param order: The same with :class:`VisionTransform` + """ + + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, *, order=None): + super().__init__(order) + transforms = [] + if brightness != 0: + transforms.append(BrightnessTransform(brightness)) + if contrast != 0: + transforms.append(ContrastTransform(contrast)) + if saturation != 0: + transforms.append(SaturationTransform(saturation)) + if hue != 0: + transforms.append(HueTransform(hue)) + self.transforms = Compose( + transforms, + shuffle_indices=[tuple(range(1, len(transforms) + 1))], + order=order, + ) + + def apply(self, input): + return self.transforms.apply(input) + + +class Lighting(VisionTransform): + def __init__(self, scale, *, order=None): + super().__init__(order) + if scale < 0: + raise ValueError("lighting scale should be non-negative") + self.scale = scale + self.eigvec = np.array( + [ + [-0.5836, -0.6948, 0.4203], + [-0.5808, -0.0045, -0.8140], + [-0.5675, 0.7192, 0.4009], + ] + ) # reverse the first dimension for BGR + self.eigval = np.array([0.2175, 0.0188, 0.0045]) + + def _apply_image(self, image): + if self.scale == 0: + return image + + dtype = image.dtype + image = image.astype(np.float32) + alpha = np.random.normal(scale=self.scale, size=3) + image = image + self.eigvec.dot(alpha * self.eigval) + return image.clip(0, 255).astype(dtype) + + def _apply_coords(self, coords): + return coords + + def _apply_mask(self, mask): + return mask diff --git a/imperative/python/megengine/device.py b/imperative/python/megengine/device.py new file mode 100644 index 0000000000000000000000000000000000000000..008920febb4efd4477dae6cf9773317b12048c69 --- /dev/null +++ b/imperative/python/megengine/device.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import os + +from .core._imperative_rt.common import CompNode, DeviceType + +__all__ = [ + "is_cuda_available", + "get_device_count", + "get_default_device", + "set_default_device", +] + +_default_device = os.getenv("MGE_DEFAULT_DEVICE", "xpux") + + +def _valid_device(inp): + if isinstance(inp, str) and len(inp) == 4: + if inp[0] in {"x", "c", "g"} and inp[1:3] == "pu": + if inp[3] == "x" or inp[3].isdigit(): + return True + return False + + +def _str2device_type(type_str: str, allow_unspec: bool = True): + type_str = type_str.upper() + if type_str == "CPU": + return DeviceType.CPU + elif type_str == "GPU" or type_str == "CUDA": + return DeviceType.CUDA + else: + assert allow_unspec and str == "XPU", "bad device type" + return DeviceType.UNSPEC + + +def get_device_count(device_type: str) -> int: + """Gets number of devices installed on this system. + + :param device_type: device type, one of 'gpu' or 'cpu' + """ + + device_type_set = ("cpu", "gpu") + assert device_type in device_type_set, "device must be one of {}".format( + device_type_set + ) + device_type = _str2device_type(device_type) + return CompNode._get_device_count(device_type, False) + + +def is_cuda_available() -> bool: + """Returns whether cuda device is available on this system. + + """ + t = _str2device_type("gpu") + return CompNode._get_device_count(t, False) > 0 + + +def set_default_device(device: str = "xpux"): + r"""Sets default computing node. + + :param device: default device type. The type can be 'cpu0', 'cpu1', etc., + or 'gpu0', 'gpu1', etc., to specify the particular cpu or gpu to use. + 'cpux' and 'gpux' can also be used to specify any number of cpu or gpu devices. + + 'multithread' device type is avaliable when inference, which implements + multi-threading parallelism at the operator level. For example, + 'multithread4' will compute with 4 threads. which implements + + The default value is 'xpux' to specify any device available. + + It can also be set by environmental variable `MGE_DEFAULT_DEVICE`. + """ + global _default_device # pylint: disable=global-statement + assert _valid_device(device), "Invalid device name {}".format(device) + _default_device = device + + +def get_default_device() -> str: + r"""Gets default computing node. + + It returns the value set by :func:`~.set_default_device`. + """ + return _default_device diff --git a/imperative/python/megengine/distributed/__init__.py b/imperative/python/megengine/distributed/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..30e0766f3dd17b959894148f334e8a9bba7a0a68 --- /dev/null +++ b/imperative/python/megengine/distributed/__init__.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .group import ( + WORLD, + get_backend, + get_client, + get_mm_server_addr, + get_py_server_addr, + get_rank, + get_world_size, + group_barrier, + init_process_group, + is_distributed, + new_group, +) +from .helper import synchronized +from .launcher import launcher +from .server import Client, Server +from .util import get_free_ports diff --git a/imperative/python/megengine/distributed/group.py b/imperative/python/megengine/distributed/group.py new file mode 100644 index 0000000000000000000000000000000000000000..2e60a4d51125999a256a2c5dd21f357c4632eed0 --- /dev/null +++ b/imperative/python/megengine/distributed/group.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import List, Optional, Tuple + +from ..device import set_default_device +from .server import Client, Server + + +class StaticData: + server = None + client = None + master_ip = None + py_server_port = None + mm_server_port = None + world_size = None + proc_rank = None + device = None + backend = None + next_stream = None + + +_sd = None + + +class Group: + def __init__(self, proc_ranks): + if len(proc_ranks) == 0: # empty group + self.proc_ranks = None + self.stream = None + else: + self.reset(proc_ranks) + + def reset(self, proc_ranks): + self.check(proc_ranks) + self.proc_ranks = proc_ranks + self.stream = _sd.next_stream + _sd.next_stream += 1 + + def check(self, proc_ranks): + assert _sd is not None, "please call init_process_group first" + for rank in proc_ranks: + assert isinstance(rank, int) + assert rank >= 0 and rank < _sd.world_size + assert _sd.proc_rank in proc_ranks + + @property + def size(self): + assert len(self.proc_ranks) > 0, "invalid group" + return len(self.proc_ranks) + + @property + def key(self): + assert len(self.proc_ranks) > 0, "invalid group" + return ",".join(map(str, self.proc_ranks)) + + @property + def rank(self): + assert len(self.proc_ranks) > 0, "invalid group" + return self.proc_ranks.index(_sd.proc_rank) + + @property + def comp_node(self): + assert len(self.proc_ranks) > 0, "invalid group" + return "gpu{}:{}".format(_sd.device, self.stream) + + +WORLD = Group([]) + + +def init_process_group( + master_ip: str, + port: int, + world_size: int, + rank: int, + device: int, + backend: Optional[str] = "nccl", +) -> None: + """Initialize the distributed process group and specify the device used in the current process + + :param master_ip: IP address of the master node + :param port: Port available for all processes to communicate + :param world_size: Total number of processes participating in the job + :param rank: Rank of the current process + :param device: The GPU device id to bind this process to + :param backend: Communicator backend, currently support 'nccl' and 'ucx' + """ + if not isinstance(master_ip, str): + raise TypeError("Expect type str but got {}".format(type(master_ip))) + if not isinstance(port, int): + raise TypeError("Expect type int but got {}".format(type(port))) + if not isinstance(world_size, int): + raise TypeError("Expect type int but got {}".format(type(world_size))) + if not isinstance(rank, int): + raise TypeError("Expect type int but got {}".format(type(rank))) + if not isinstance(device, int): + raise TypeError("Expect type int but got {}".format(type(backend))) + if not isinstance(backend, str): + raise TypeError("Expect type str but got {}".format(type(backend))) + + global _sd + assert _sd is None, "init_process_group should be called only once" + _sd = StaticData() + + assert world_size > 1 + assert rank >= 0 and rank < world_size + assert port > 0 + + _sd.client = Client(master_ip, port) + _sd.master_ip = master_ip + _sd.py_server_port = port + _sd.mm_server_port = _sd.client.get_mm_server_port() + _sd.world_size = world_size + _sd.proc_rank = rank + _sd.device = device + _sd.backend = backend + _sd.next_stream = 1 + + WORLD.reset(list(range(world_size))) + + set_default_device("gpu{}".format(device)) + + +def is_distributed() -> bool: + """Return True if the distributed process group has been initialized""" + return _sd is not None + + +def get_rank() -> int: + """Get the rank of the current process""" + return _sd.proc_rank if _sd is not None else 0 + + +def get_world_size() -> int: + """Get the total number of processes participating in the job""" + return _sd.world_size if _sd is not None else 1 + + +def get_backend() -> str: + """Get the backend str""" + assert _sd is not None, "please call init_process_group first" + return _sd.backend if _sd is not None else None + + +def get_py_server_addr() -> Tuple[str, int]: + """Get master_ip and port of python XML RPC server""" + assert _sd is not None, "please call init_process_group first" + return _sd.master_ip, _sd.py_server_port + + +def get_mm_server_addr() -> Tuple[str, int]: + """Get master_ip and port of C++ mm_server""" + assert _sd is not None, "please call init_process_group first" + return _sd.master_ip, _sd.mm_server_port + + +def get_client() -> Client: + """Get client of python XML RPC server""" + assert _sd is not None, "please call init_process_group first" + return _sd.client + + +def new_group(proc_ranks: List[int]) -> Group: + """Build a subgroup containing certain ranks""" + return Group(proc_ranks) + + +def group_barrier(group: Optional[Group] = WORLD) -> None: + """Block until all ranks in the group reach this barrier""" + assert isinstance(group, Group) + _sd.client.group_barrier(group.key, group.size) diff --git a/imperative/python/megengine/distributed/helper.py b/imperative/python/megengine/distributed/helper.py new file mode 100644 index 0000000000000000000000000000000000000000..f56cddc0272c1623d00ca837caa36e877ce38c3b --- /dev/null +++ b/imperative/python/megengine/distributed/helper.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import functools +from typing import Callable + +from .group import group_barrier, is_distributed + + +def synchronized(func: Callable): + """Decorator. Decorated function will synchronize when finished. + Specifically, we use this to prevent data race during hub.load""" + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not is_distributed(): + return func(*args, **kwargs) + + ret = func(*args, **kwargs) + group_barrier() + return ret + + return wrapper diff --git a/imperative/python/megengine/distributed/launcher.py b/imperative/python/megengine/distributed/launcher.py new file mode 100644 index 0000000000000000000000000000000000000000..152180abb47e212c9d89b77a94610c62a0623ee3 --- /dev/null +++ b/imperative/python/megengine/distributed/launcher.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import multiprocessing as mp + +from ..device import get_device_count +from .group import init_process_group +from .server import Server +from .util import get_free_ports + + +def _get_device_count(): + """use subprocess to avoid cuda environment initialization in the main process""" + + def run(q): + count = get_device_count("gpu") + q.put(count) + + q = mp.Queue() + p = mp.Process(target=run, args=(q,)) + p.start() + p.join() + return q.get() + + +def _run_wrapped(func, master_ip, port, world_size, rank, dev, args, kwargs): + """init distributed process group and run wrapped function""" + init_process_group( + master_ip=master_ip, port=port, world_size=world_size, rank=rank, device=dev + ) + func(*args, **kwargs) + + +def launcher(n_gpus): + """decorator for launching multiple processes in single-machine multi-gpu training""" + + count = _get_device_count() + assert isinstance(n_gpus, int) and n_gpus > 1, "invalid n_gpus" + assert n_gpus <= count, "{} gpus required, {} gpus provided".format(n_gpus, count) + + def decorator(func): + def wrapper(*args, **kwargs): + master_ip = "localhost" + port = get_free_ports(1)[0] + server = Server(port) + + procs = [] + for rank in range(n_gpus): + p = mp.Process( + target=_run_wrapped, + args=(func, master_ip, port, n_gpus, rank, rank, args, kwargs), + ) + p.start() + procs.append(p) + + for rank in range(n_gpus): + procs[rank].join() + code = procs[rank].exitcode + assert code == 0, "subprocess {} exit with code {}".format(rank, code) + + return wrapper + + return decorator diff --git a/imperative/python/megengine/distributed/server.py b/imperative/python/megengine/distributed/server.py new file mode 100644 index 0000000000000000000000000000000000000000..d3d811209a692f927e29aea47ce874fa61cf918c --- /dev/null +++ b/imperative/python/megengine/distributed/server.py @@ -0,0 +1,170 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import multiprocessing as mp +import threading +import time +from collections import defaultdict +from functools import partial +from socketserver import ThreadingMixIn +from xmlrpc.client import ServerProxy +from xmlrpc.server import SimpleXMLRPCServer + +from ..core._imperative_rt.utils import create_mm_server +from .util import get_free_ports + + +class Future: + def __init__(self, ack=True): + self.ready = threading.Event() + self.ack = threading.Event() if ack else None + + def set(self, value): + self.value = value + self.ready.set() + if self.ack: + self.ack.wait() + + def get(self): + self.ready.wait() + if self.ack: + self.ack.set() + return self.value + + +class Methods: + def __init__(self, mm_server_port): + self.lock = threading.Lock() + self.mm_server_port = mm_server_port + self.dict_is_grad = defaultdict(partial(Future, True)) + self.dict_remote_tracer = defaultdict(partial(Future, True)) + self.dict_pack_list = defaultdict(partial(Future, False)) + self.dict_barrier_counter = defaultdict(int) + self.dict_barrier_event = defaultdict(threading.Event) + + def connect(self): + return True + + def get_mm_server_port(self): + return self.mm_server_port + + def set_is_grad(self, rank_peer, is_grad): + with self.lock: + future = self.dict_is_grad[rank_peer] + future.set(is_grad) + return True + + def check_is_grad(self, rank_peer): + with self.lock: + future = self.dict_is_grad[rank_peer] + ret = future.get() + with self.lock: + del self.dict_is_grad[rank_peer] + return ret + + def set_remote_tracer(self, rank_peer, tracer_set): + with self.lock: + future = self.dict_remote_tracer[rank_peer] + future.set(tracer_set) + return True + + def check_remote_tracer(self, rank_peer): + with self.lock: + future = self.dict_remote_tracer[rank_peer] + ret = future.get() + with self.lock: + del self.dict_remote_tracer[rank_peer] + return ret + + def set_pack_list(self, key, pack_list): + with self.lock: + future = self.dict_pack_list[key] + future.set(pack_list) + return True + + def get_pack_list(self, key): + with self.lock: + future = self.dict_pack_list[key] + return future.get() + + def group_barrier(self, key, size): + with self.lock: + self.dict_barrier_counter[key] += 1 + counter = self.dict_barrier_counter[key] + event = self.dict_barrier_event[key] + if counter == size: + del self.dict_barrier_counter[key] + del self.dict_barrier_event[key] + event.set() + else: + event.wait() + return True + + +class ThreadXMLRPCServer(ThreadingMixIn, SimpleXMLRPCServer): + pass + + +def start_server(py_server_port, mm_server_port): + server = ThreadXMLRPCServer(("0.0.0.0", py_server_port), logRequests=False) + server.register_instance(Methods(mm_server_port)) + server.serve_forever() + + +class Server: + def __init__(self, port): + self.py_server_port = get_free_ports(1)[0] if port == 0 else port + self.mm_server_port = create_mm_server("0.0.0.0", 0) + self.proc = mp.Process( + target=start_server, + args=(self.py_server_port, self.mm_server_port), + daemon=True, + ) + self.proc.start() + + +class Client: + def __init__(self, master_ip, port): + self.master_ip = master_ip + self.port = port + self.connect() + + def connect(self): + while True: + try: + self.proxy = ServerProxy( + "http://{}:{}".format(self.master_ip, self.port) + ) + if self.proxy.connect(): + break + except: + time.sleep(1) + + def get_mm_server_port(self): + return self.proxy.get_mm_server_port() + + def set_is_grad(self, rank_peer, is_grad): + self.proxy.set_is_grad(rank_peer, is_grad) + + def check_is_grad(self, rank_peer): + return self.proxy.check_is_grad(rank_peer) + + def set_remote_tracer(self, rank_peer, tracer_set): + self.proxy.set_remote_tracer(rank_peer, tracer_set) + + def check_remote_tracer(self, rank_peer): + return self.proxy.check_remote_tracer(rank_peer) + + def set_pack_list(self, key, pack_list): + self.proxy.set_pack_list(key, pack_list) + + def get_pack_list(self, key): + return self.proxy.get_pack_list(key) + + def group_barrier(self, key, size): + self.proxy.group_barrier(key, size) diff --git a/imperative/python/megengine/distributed/util.py b/imperative/python/megengine/distributed/util.py new file mode 100644 index 0000000000000000000000000000000000000000..b3a0a2aa18a2bfde09d7cd48d223e85867b60369 --- /dev/null +++ b/imperative/python/megengine/distributed/util.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import functools +import socket +from typing import List + + +def get_free_ports(num: int) -> List[int]: + """Get one or more free ports. + """ + socks, ports = [], [] + for i in range(num): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.bind(("", 0)) + socks.append(sock) + ports.append(sock.getsockname()[1]) + for sock in socks: + sock.close() + return ports diff --git a/imperative/python/megengine/functional/__init__.py b/imperative/python/megengine/functional/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..14fef9b30fa74bd7f2b6043909f1d25fadda28be --- /dev/null +++ b/imperative/python/megengine/functional/__init__.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# pylint: disable=redefined-builtin +from . import distributed +from .elemwise import * +from .graph import add_update +from .loss import ( + binary_cross_entropy, + cross_entropy, + cross_entropy_with_softmax, + hinge_loss, + l1_loss, + nll_loss, + smooth_l1_loss, + square_loss, + triplet_margin_loss, +) +from .math import * +from .nn import * +from .quantized import conv_bias_activation +from .tensor import * +from .utils import accuracy, zero_grad + +# delete namespace +# pylint: disable=undefined-variable +# del elemwise, graph, loss, math, nn, tensor # type: ignore[name-defined] diff --git a/imperative/python/megengine/functional/debug_param.py b/imperative/python/megengine/functional/debug_param.py new file mode 100644 index 0000000000000000000000000000000000000000..b27f4b4b205acc2ebbc4c947c5be3f4a955be048 --- /dev/null +++ b/imperative/python/megengine/functional/debug_param.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import os + +_conv_execution_strategy = os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY", "HEURISTIC") + + +def get_conv_execution_strategy() -> str: + """Returns the execuation strategy of :class:`~.Conv2d`. + + See :func:`~.set_conv_execution_strategy` for possible return values + """ + return _conv_execution_strategy + + +def set_conv_execution_strategy(option: str): + """Sets the execuation strategy of :class:`~.Conv2d`. + + :param option: Decides how :class:`~.Conv2d` algorithm is chosen. + Available values: + + * 'HEURISTIC' uses heuristic to choose the fastest algorithm. + * 'PROFILE' runs possible algorithms on real device to find the best. + * 'PROFILE_HEURISTIC' uses profile result and heuristic to choose the fastest algorithm. + * 'PROFILE_REPRODUCIBLE' uses the fastest of profile result that is also reproducible. + * 'HEURISTIC_REPRODUCIBLE' uses heuristic to choose the fastest algorithm that is also reproducible. + + The default strategy is 'HEURISTIC'. + + It can also be set through the environmental variable 'MEGENGINE_CONV_EXECUTION_STRATEGY'. + """ + valid_option = ( + "HEURISTIC", + "PROFILE", + "PROFILE_HEURISTIC", + "PROFILE_REPRODUCIBLE", + "HEURISTIC_REPRODUCIBLE", + ) + if not option in valid_option: + raise ValueError("Valid option can only be one of {}".format(valid_option)) + + global _conv_execution_strategy # pylint: disable=global-statement + _conv_execution_strategy = option diff --git a/imperative/python/megengine/functional/distributed.py b/imperative/python/megengine/functional/distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..92e93f84601a833e864fb34c6d3ddf24c7dc5434 --- /dev/null +++ b/imperative/python/megengine/functional/distributed.py @@ -0,0 +1,299 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Optional, Tuple + +from ..core._imperative_rt.ops import CollectiveCommDefModeEnum +from ..core.autodiff.builtin_op_utils import builtin_op_get_backward_fn +from ..core.autodiff.grad import ( + Tracer, + check_backward_allow_noinput, + get_grad_managers, + get_op_has_grad_fn, + tracer_apply, +) +from ..core.ops.builtin import CollectiveComm, Copy, RemoteRecv, RemoteSend +from ..core.tensor.core import apply +from ..core.tensor.tensor import Tensor, tensor_apply +from ..distributed.group import ( + WORLD, + Group, + get_backend, + get_client, + get_mm_server_addr, + get_rank, +) +from ..tensor import tensor + +__all__ = [ + "reduce_sum", + "broadcast", + "all_gather", + "reduce_scatter_sum", + "all_reduce_sum", + "all_reduce_max", + "all_reduce_min", + "gather", + "scatter", + "all_to_all", + "remote_send", + "remote_recv", +] + + +@apply.add +def _(op: RemoteSend, *args: Tensor): + ret = tensor_apply(op, *args) + + # set extra information + tracer_set = dict() + for k in set().union(*(i._extra_data for i in args if isinstance(i, Tensor))): + tracer_set[k.name] = True + + # check tracer_set in remote_recv + get_client().set_remote_tracer(op.key, tracer_set) + return ret + + +@builtin_op_get_backward_fn.register(RemoteSend) +def _(op: RemoteSend, inputs, outputs, input_requires_grad): + def backward(*args): + return [ + remote_recv( + op.rank_to, inputs[0].shape, inputs[0].dtype, str(inputs[0].device) + ) + ] + + return backward, [True] + + +@get_op_has_grad_fn.register(RemoteSend) +def _(op: RemoteSend): + def has_grad(opnode, reached): + return get_client().check_is_grad(op.key) + + return has_grad + + +@check_backward_allow_noinput.register(RemoteSend) +def _(op: RemoteSend): + return True + + +@builtin_op_get_backward_fn.register(RemoteRecv) +def _(op: RemoteRecv, inputs, outputs, input_requires_grad): + def backward(*output_grads): + return [remote_send(output_grads[0], op.rank_from)] + + return backward, [True] + + +@get_op_has_grad_fn.register(RemoteRecv) +def _(op: RemoteRecv): + def has_grad(opnode, reached): + ret = False + for v in opnode.outputs: + if v() in reached: + ret = True + break + get_client().set_is_grad(op.key, ret) + return ret + + return has_grad + + +def collective_comm(inp, mode, group, device): + """Helper function for applying collective communication functions""" + assert isinstance(group, Group) + if group is None: + return inp + op = CollectiveComm() + op.key = group.key + op.nr_devices = group.size + op.rank = group.rank + op.is_root = op.rank == 0 + op.local_grad = False + op.addr, op.port = get_mm_server_addr() + op.mode = mode + op.dtype = inp.dtype + op.backend = get_backend() + op.comp_node = device + return apply(op, inp)[0] + + +def reduce_sum( + inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" +) -> Tensor: + """Create reduce_sum operator for collective communication + + :param inp: input tensor + :param group: communication group + :param device: execute placement + """ + mode = CollectiveCommDefModeEnum.REDUCE_SUM + return collective_comm(inp, mode, group, device) + + +def broadcast( + inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" +) -> Tensor: + """Create broadcast operator for collective communication + + :param inp: input tensor + :param group: communication group + :param device: execute placement + """ + mode = CollectiveCommDefModeEnum.BROADCAST + return collective_comm(inp, mode, group, device) + + +def all_gather( + inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" +) -> Tensor: + """Create all_gather operator for collective communication + + :param inp: input tensor + :param group: communication group + :param device: execute placement + """ + mode = CollectiveCommDefModeEnum.ALL_GATHER + return collective_comm(inp, mode, group, device) + + +def reduce_scatter_sum( + inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" +) -> Tensor: + """Create reduce_scatter_sum operator for collective communication + + :param inp: input tensor + :param group: communication group + :param device: execute placement + """ + mode = CollectiveCommDefModeEnum.REDUCE_SCATTER_SUM + return collective_comm(inp, mode, group, device) + + +def all_reduce_sum( + inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" +) -> Tensor: + """Create all_reduce_sum operator for collective communication + + :param inp: input tensor + :param group: communication group + :param device: execute placement + """ + mode = CollectiveCommDefModeEnum.ALL_REDUCE_SUM + return collective_comm(inp, mode, group, device) + + +def all_reduce_max( + inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" +) -> Tensor: + """Create all_reduce_max operator for collective communication + + :param inp: input tensor + :param group: communication group + :param device: execute placement + """ + mode = CollectiveCommDefModeEnum.ALL_REDUCE_MAX + return collective_comm(inp, mode, group, device) + + +def all_reduce_min( + inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" +) -> Tensor: + """Create all_reduce_min operator for collective communication + + :param inp: input tensor + :param group: communication group + :param device: execute placement + """ + mode = CollectiveCommDefModeEnum.ALL_REDUCE_MIN + return collective_comm(inp, mode, group, device) + + +def gather( + inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" +) -> Tensor: + """Create gather operator for collective communication + + :param inp: input tensor + :param group: communication group + :param device: execute placement + """ + mode = CollectiveCommDefModeEnum.GATHER + return collective_comm(inp, mode, group, device) + + +def scatter( + inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" +) -> Tensor: + """Create scatter operator for collective communication + + :param inp: input tensor + :param group: communication group + :param device: execute placement + """ + mode = CollectiveCommDefModeEnum.SCATTER + return collective_comm(inp, mode, group, device) + + +def all_to_all( + inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" +) -> Tensor: + """Create all_to_all operator for collective communication + + :param inp: input tensor + :param group: communication group + :param device: execute placement + """ + mode = CollectiveCommDefModeEnum.ALL_TO_ALL + return collective_comm(inp, mode, group, device) + + +def remote_send(inp: Tensor, dest_rank: int) -> Tensor: + """Send a Tensor to a remote process + + :param inp: tensor to send + :param dest_rank: destination process rank + """ + op = RemoteSend() + op.key = "{}->{}".format(get_rank(), dest_rank) + op.addr, op.port = get_mm_server_addr() + op.rank_to = dest_rank + return apply(op, inp)[0] + + +def remote_recv( + src_rank: int, shape: Tuple[int], dtype: type, cn: Optional[str] = "gpu0" +) -> Tensor: + """Receive a Tensor from a remote process + + :param src_rank: source process rank + :param shape: the shape of the tensor to receive + :param dtype: the data type of the tensor to receive + :param cn: the comp node to place the received tensor + """ + key = "{}->{}".format(src_rank, get_rank()) + + # dummpy input + inp = tensor([0]) + tracer_set = get_client().check_remote_tracer(key) + for grad_manager in get_grad_managers(): + if grad_manager.name in tracer_set: + grad_manager.wrt(inp) + + op = RemoteRecv() + op.key = key + op.cn = cn + op.shape = shape + op.dtype = dtype + op.addr, op.port = get_mm_server_addr() + op.rank_from = src_rank + + return apply(op, inp)[0] diff --git a/imperative/python/megengine/functional/elemwise.py b/imperative/python/megengine/functional/elemwise.py new file mode 100644 index 0000000000000000000000000000000000000000..3b8ac1f08f1230151d07be35bc1b14e270964386 --- /dev/null +++ b/imperative/python/megengine/functional/elemwise.py @@ -0,0 +1,481 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# pylint: disable=unused-argument,invalid-name,redefined-builtin,arguments-out-of-order +import functools + +from ..core.ops import builtin +from ..core.tensor import utils +from ..core.tensor.core import apply +from ..tensor import Tensor + +__all__ = [ + "abs", + "add", + "acos", + "asin", + "atan", + "atan2", + "asinh", + "acosh", + "atanh", + "bitwise_and", # TODO + "bitwise_not", # TODO + "bitwise_or", # TODO + "bitwise_xor", # TODO + "ceil", + "clamp", + "cos", + "cosh", + "div", + "eq", + "exp", + "expm1", + "floor", + "floor_div", + "gt", + "ge", + "hswish", + "hsigmoid", + "left_shift", + "lt", + "le", + "log", + "log1p", + "logical_and", + "logical_not", + "logical_or", + "logical_xor", + "maximum", + "minimum", + "mod", + "mul", + "neg", + "ne", + "pow", + "relu", + "relu6", + "right_shift", + "round", + "sigmoid", + "sin", + "sinh", + "sqrt", + "square", + "sub", + "tan", + "tanh", + "fast_tanh", +] + + +def _elwise(*args, mode): + op = builtin.Elemwise(mode=mode) + args = utils.convert_inputs(*args) + (result,) = apply(op, *args) + return result + + +def _logical(*args, mode): + op = builtin.CondExecPredLogical(mode=mode) + args = utils.convert_inputs(*args) + (result,) = apply(op, *args) + return result + + +def _elemwise_multi_type(*args, mode, **kwargs): + op = builtin.ElemwiseMultiType(mode=mode, **kwargs) + args = utils.convert_inputs(*args) + (result,) = apply(op, *args) + return result + + +# math operations + + +def add(x, y): + """Element-wise addition. + At least one operand should be tensor. + same for sub/mul/div/floor_div/pow/mod/atan2/eq/ne/lt/le/gt/ge/maximum/minmium. + """ + return _elwise(x, y, mode="add") + + +def sub(x, y): + """Element-wise subtract.""" + return _elwise(x, y, mode="sub") + + +def mul(x, y): + """Element-wise multiplication.""" + return _elwise(x, y, mode="mul") + + +def div(x, y): + """Element-wise (x / y).""" + return _elwise(x, y, mode="true_div") + + +def floor_div(x, y): + """Element-wise floor(x / y).""" + return _elwise(x, y, mode="floor_divide") + + +def neg(x): + """Element-wise negation.""" + return _elwise(x, mode="negate") + + +def pow(x, y): + """Element-wise power.""" + return _elwise(x, y, mode="pow") + + +def mod(x, y): + """Element-wise remainder of division.""" + return _elwise(x, y, mode="mod") + + +def abs(x): + """Element-wise absolute value.""" + return _elwise(x, mode="abs") + + +def exp(x): + """Element-wise exponential.""" + return _elwise(x, mode="exp") + + +def expm1(x): + """Element-wise exp(x)-1.""" + return _elwise(x, mode="expm1") + + +def log(x): + """Element-wise logarithm (base `e`).""" + return _elwise(x, mode="log") + + +def log1p(x): + """Element-wise log(x+1) (base `e`).""" + return _elwise(x, mode="log1p") + + +def sqrt(inp: Tensor) -> Tensor: + """ + Return a new tensor with the square-root of the elements of ``inp``. + For negative value, return nan. + + :param inp: The input tensor + :return: The computed tensor + + Examples: + + .. testcode:: + + import numpy as np + import megengine as mge + import megengine.functional as F + + data = mge.tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) + out = F.sqrt(data) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [[0. 1. 1.4142] + [1.7321 2. 2.2361 ]] + + """ + return inp ** 0.5 + + +def square(inp: Tensor) -> Tensor: + """ + Return a new tensor with the square of the elements of ``inp`` + + :param inp: The input tensor + :return: The computed tensor + + Examples: + + .. testcode:: + + import numpy as np + import megengine as mge + import megengine.functional as F + + data = mge.tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) + out = F.square(data) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [[0. 1. 4.] + [9. 16. 25.]] + + """ + return inp ** 2 + + +def round(x): + """Round tensor to int element-wise.""" + return _elwise(x, mode="round") + + +def ceil(x): + """Return the ceil of the input, element-wise.""" + return _elwise(x, mode="ceil") + + +def floor(x): + """Calculate the floor element-wise""" + return _elwise(x, mode="floor") + + +# trigonometric functions + + +def cos(x): + """Cosine, element-wise.""" + return _elwise(x, mode="cos") + + +def sin(x): + """Sine, element-wise.""" + return _elwise(x, mode="sin") + + +def tan(x): + return sin(x) / cos(x) + + +def acos(x): + """Inverse cosine, element-wise.""" + return _elwise(x, mode="acos") + + +def asin(x): + """Inverse sine, element-wise.""" + return _elwise(x, mode="asin") + + +def atan(x): + return _elwise(x, 1, mode="atan2") + + +def atan2(y, x): + return _elwise(y, x, mode="atan2") + + +def cosh(x): + r"""Compute element-wise hyperbolic cosine.""" + return 0.5 * (exp(x) + exp(-x)) + + +def sinh(x): + r"""Compute element-wise hyperbolic sine.""" + u = expm1(x) + return 0.5 * u / (u + 1) * (u + 2) + + +def tanh(x): + r"""Compute element-wise hyperbolic tangent.""" + return _elwise(x, mode="tanh") + + +def asinh(x): + r"""Compute element-wise inverse hyperbolic sine.""" + return log(x + (x ** 2 + 1) ** 0.5) + + +def acosh(x): + r"""Compute element-wise inverse hyperbolic cosine.""" + return log(x + (x ** 2 - 1) ** 0.5) + + +def atanh(x): + r"""Compute element-wise inverse hyperbolic tangent.""" + return log1p(2 * x / (1 - x)) / 2 + + +def fast_tanh(x): + r"""Compute element-wise fast tanh; this is an approximation: + + .. math:: + \text{fast_tanh}(x) = x * (27. + x * x) / (27. + 9. * x * x) + """ + return _elwise(x, mode="fast_tanh") + + +# bit-twiddling functions + + +def left_shift(x, y): + return _elwise(x, y, mode="shl") + + +def right_shift(x, y): + return _elwise(x, y, mode="shl") + + +def bitwise_and(x, y): + raise NotImplementedError + + +def bitwise_not(x): + raise NotImplementedError + + +def bitwise_or(x, y): + raise NotImplementedError + + +def bitwise_xor(x, y): + raise NotImplementedError + + +# logical functions + + +def logical_and(x, y): + return _elwise(x, y, mode="AND") + + +def logical_not(x): + return _elwise(x, mode="NOT") + + +def logical_or(x, y): + return _elwise(x, y, mode="OR") + + +def logical_xor(x, y): + return _elwise(x, y, mode="XOR") + + +# comparison functions + + +def eq(x, y): + """Return (x == y) element-wise.""" + return _elwise(x, y, mode="eq") + + +def ne(x, y): + return x != y + + +def lt(x, y): + """Return (x < y) element-wise.""" + return _elwise(x, y, mode="lt") + + +def le(x, y): + """Return (x =< y) element-wise.""" + return _elwise(x, y, mode="leq") + + +def gt(x, y): + """Return (x > y) element-wise.""" + return _elwise(y, x, mode="lt") + + +def ge(x, y): + """Return (x >= y) element-wise""" + return _elwise(y, x, mode="leq") + + +def hswish(x): + """Return x * relu6(x + 3) / 6 element-wise""" + return _elwise(x, mode="h_swish") + + +def hsigmoid(x): + """Return relu6(x + 3) / 6 element-wise""" + return relu6(x + 3) / 6 + + +def relu(x): + """Return `max(x, 0)` element-wise.""" + return _elwise(x, mode="relu") + + +def relu6(x): + """Return min(max(x, 0), 6) element-wise.""" + return minimum(maximum(x, 0), 6) + + +def sigmoid(x): + """Return 1 / ( 1 + exp( -x ) ) element-wise.""" + return _elwise(x, mode="sigmoid") + + +def maximum(x, y): + """Element-wise maximum of array elements.""" + return _elwise(x, y, mode="max") + + +def minimum(x, y): + """Element-wise minimum of array elements.""" + return _elwise(x, y, mode="min") + + +def clamp(inp: Tensor, lower=None, upper=None) -> Tensor: + r""" + Clamp all elements in :attr:`inp` into the range `[` :attr:`lower`, :attr:`upper` `]` and return + a resulting tensor: + + .. math:: + y_i = \begin{cases} + \text{lower} & \text{if } x_i < \text{lower} \\ + x_i & \text{if } \text{lower} \leq x_i \leq \text{upper} \\ + \text{upper} & \text{if } x_i > \text{upper} + \end{cases} + + :param inp: the input tensor. + :param lower: lower-bound of the range to be clamped to + :param upper: upper-bound of the range to be clamped to + + Example: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + a = tensor(np.arange(5).astype(np.int32)) + + print(F.clamp(a, 2, 4).numpy()) + + print(F.clamp(a, lower=3).numpy()) + + print(F.clamp(a, upper=3).numpy()) + + .. testoutput:: + + [2 2 2 3 4] + [3 3 3 3 4] + [0 1 2 3 3] + + """ + assert ( + lower is not None or upper is not None + ), "At least one of 'lower' or 'upper' must not be None" + if lower is not None: + if upper is not None: + assert lower <= upper, "clamp lower bound is bigger that upper bound" + return minimum(maximum(inp, lower), upper) + else: + return maximum(inp, lower) + else: + return minimum(inp, upper) diff --git a/imperative/python/megengine/functional/external.py b/imperative/python/megengine/functional/external.py new file mode 100644 index 0000000000000000000000000000000000000000..6411be0a0602a91b6926425d7e61dfbf04b96111 --- /dev/null +++ b/imperative/python/megengine/functional/external.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# pylint: disable=too-many-lines +from typing import List + +from ..core import Tensor + + +def cambricon_subgraph( + inputs: List[Tensor], data: bytes, symbol: str, tensor_dim_mutable: bool, +) -> List[Tensor]: + """Load a serialized Cambricon subgraph (i.e. cnrtModel_t) and + execute the operations defined in the subgraph. + + :param inputs: List of input tensors of the subgraph. + :param data: The serialized subgraph. + :param symbol: The name of the function in the subgraph. + The function is corresponding to a cnmlFusionOp + which is added to the cnmlModel_t/cnrtModel_t. + :param tensor_dim_mutable: Whether the input tensors' shapes are mutalbe + in cnrtModel_t + """ + raise NotImplementedError + + +def extern_opr_subgraph( + inputs, output_shapes: List[tuple], dump_name: str, dump_data: bytes, +) -> List[Tensor]: + """Load a serialized extern opr subgraph and fake execute the operator + + :param inputs: Tensor or list of input tensors. + :param output_shapes: The output shapes. + :param dump_name: The serialized subgraph name. + :param dump_data: The serialized subgraph. + + :return: List of tensors + """ + raise NotImplementedError diff --git a/imperative/python/megengine/functional/graph.py b/imperative/python/megengine/functional/graph.py new file mode 100644 index 0000000000000000000000000000000000000000..54009172de1b763fa5b4502d53f3a569e614ecc7 --- /dev/null +++ b/imperative/python/megengine/functional/graph.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections +from typing import Iterable, Optional, Union + +from ..core.tensor import Tensor + + +def add_update( + dest: Tensor, + delta: Tensor, + *, + alpha: Union[Tensor, float, int] = 1.0, + beta: Union[Tensor, float, int] = 1.0, + bias: Union[Tensor, float, int] = 0.0 +): + r"""Inplace modify ``dest`` as follows: + + .. math:: + dest = alpha * dest + beta * delta + bias + + :param dest: input data that will be inplace modified. + :param delta: update value that will be added to ``dest``. + :param alpha: weight ratio of ``dest``. Default: 1.0 + :param beta: weight ratio of ``delta``. Default: 1.0 + :param bias: bias value appended to the result. Default: 0.0 + """ + if beta is not None and beta != 1.0: + delta = delta * beta + if bias is not None and bias != 0.0: + delta = delta + bias + if alpha is not None and alpha != 1.0: + dest *= alpha + dest += delta + return dest diff --git a/imperative/python/megengine/functional/loss.py b/imperative/python/megengine/functional/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..400065d4b2c84958890e9fabb9e3491214008f25 --- /dev/null +++ b/imperative/python/megengine/functional/loss.py @@ -0,0 +1,388 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +from ..tensor import Tensor +from .elemwise import abs, eq, exp, log, maximum, pow, relu +from .nn import assert_equal, indexing_one_hot +from .tensor import where +from .utils import zero_grad + + +def l1_loss(pred: Tensor, label: Tensor) -> Tensor: + r""" + Calculates the mean absolute error (MAE) between + each element in the pred :math:`x` and label :math:`y`. + + The mean absolute error can be described as: + + .. math:: \ell(x,y) = mean\left(L \right) + + where + + .. math:: + + L = \{l_1,\dots,l_N\}, \quad + l_n = \left| x_n - y_n \right|, + + :math:`x` and :math:`y` are tensors of arbitrary shapes with a total + of :math:`N` elements each. :math:`N` is the batch size. + + :param pred: The predicted result from model. + :param label: The ground truth to compare. + + Examples: + + .. testcode:: + + import numpy as np + import megengine as mge + import megengine.functional as F + ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) + tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) + loss = F.l1_loss(ipt,tgt) + print(loss.numpy()) + + Outputs: + + .. testoutput:: + + [2.75] + + """ + diff = pred - label + return abs(diff).mean() + + +def square_loss(pred: Tensor, label: Tensor) -> Tensor: + r""" + Calculates the mean squared error (squared L2 norm) between + each element in the pred :math:`x` and label :math:`y`. + + The mean squared error can be described as: + + .. math:: \ell(x, y) = mean\left( L \right) + + where + + .. math:: + + L = \{l_1,\dots,l_N\}, \quad + l_n = \left( x_n - y_n \right)^2, + + :math:`x` and :math:`y` are tensors of arbitrary shapes with a total + of :math:`N` elements each. :math:`N` is the batch size. + + :param pred: The predicted result from model. + :param label: The ground truth to compare. + + Shape: + - pred: :math:`(N, *)` where :math:`*` means any number of additional + dimensions + - label: :math:`(N, *)`. Same shape as ``pred`` + + """ + diff = pred - label + return (diff ** 2).mean() + + +def cross_entropy( + inp: Tensor, target: Tensor, axis: int = 1, ignore_index: int = -1 +) -> Tensor: + r""" + Returns the cross entropy loss in a classification problem. + + .. math:: \textrm{CrossEntropy}(x, y) = - \sum_{i} y_i\log(x_i) + + :param inp: The input tensor representing the predicted probability. + :param label: The input tensor representing the classification label. + :param axis: An axis along which cross_entropy will be applied. Default: 1 + :param ignore_index: Specifies a target value that is ignored and does not contribute to the input gradient. Default: -1 + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + data_shape = (1, 2) + label_shape = (1, ) + + pred = tensor(np.array([0.5, 0.5], dtype=np.float32).reshape(data_shape)) + label = tensor(np.ones(label_shape, dtype=np.int32)) + loss = F.cross_entropy(pred, label) + print(loss.numpy()) + + Outputs: + + .. testoutput:: + + [0.69] + + """ + raise NotImplementedError + # n0 = inp.ndim + # n1 = target.ndim + # assert n0 == n1 + 1, ( + # "target ndim must be one less than input ndim; input_ndim={} " + # "target_ndim={}".format(n0, n1) + # ) + + # if ignore_index != -1: + # mask = 1 - equal(target, ignore_index) + # target = target * mask + # loss = -log(indexing_one_hot(inp, target, axis)) * mask + # return loss.sum() / maximum(mask.sum(), 1.0) + # else: + # return -log(indexing_one_hot(inp, target, axis)).mean() + + +def cross_entropy_with_softmax( + pred: Tensor, label: Tensor, axis: int = 1, label_smooth: float = 0 +) -> Tensor: + r""" + Returns loss after applying :func:`~.softmax` + :func:`~.cross_entropy`. + + It has better numerical stability compared with sequential calls to :func:`~.softmax` and :func:`~.cross_entropy`. + + When using label smoothing, the label distribution is as follows: + + .. math:: y^{LS}_{k}=y_{k}\left(1-\alpha\right)+\alpha/K + + where :math:`y^{LS}` and :math:`y` are new label distribution and origin label distribution respectively. + k is the index of label distribution. :math:`\alpha` is label_smooth and :math:`K` is the number of classes. + + :param pred: The input tensor representing the predicted probability. + :param label: The input tensor representing the classification label. + :param axis: An axis along which softmax will be applied. Default: 1. + :param label_smooth: A label smoothing of parameter that can re-distribute target distribution. Default: 0. + """ + n0 = pred.ndim + n1 = label.ndim + assert n0 == n1 + 1, ( + "target ndim must be one less than input ndim; input_ndim={} " + "target_ndim={}".format(n0, n1) + ) + + num_classes = pred.shape[axis] + + # Denominator of the softmax + offset = pred.max(axis=axis).detach() + pred = pred - offset + down = exp(pred).sum(axis=axis) + + up = pred[np.arange(pred.shape[0]), label] + + if label_smooth != 0: + factor = label_smooth / num_classes + up = up * (1 - label_smooth) + pred.sum(axis=axis) * factor + + return (log(down) - up).mean() + + +def triplet_margin_loss( + anchor: Tensor, positive: Tensor, negative: Tensor, margin: float = 1.0, p: int = 2 +) -> Tensor: + r""" + Creates a criterion that measures the triplet loss given an input tensors. + + .. math:: + + L(a, p, n) = max\left\{d\left(a_{i},p_{i}\right)-d\left(a_{i}, n_{i}\right)+margin, 0\right\},\ + d\left(x_{i},y_{i}\right)=\left\|x_{i}-y_{i}\right\|_{p} + + :param anchor: The input tensor representing the anchor samples. + :param positive: The input tensor representing the positive samples. + :param negative: The input tensor representing the negative samples. + :param margin: Default: 1.0 + :param p: The norm degree for pairwise distance. Default: 2.0 + """ + s0 = anchor.shapeof() + s1 = positive.shapeof() + s2 = negative.shapeof() + assert_equal(s0, s1) + assert_equal(s1, s2) + + n0 = anchor.ndim + n1 = positive.ndim + n2 = negative.ndim + assert n0 == 2 and n1 == 2 and n2 == 2, ( + "anchor ndim, positive ndim, and negative ndim must be 2; " + "anchor_ndim={} positive_ndim={} negative_ndim={}".format(n0, n1, n2) + ) + assert p > 0, "a margin with a value greater than 0; p={}".format(p) + + diff0 = abs(anchor - positive) + diff1 = abs(anchor - negative) + + d1 = power(power(diff0, p).sum(axis=1, keepdims=True), 1 / p) + d2 = power(power(diff1, p).sum(axis=1, keepdims=True), 1 / p) + + loss = maximum(d1 - d2 + margin, 0) + + return loss.mean() + + +def binary_cross_entropy(pred: Tensor, label: Tensor) -> Tensor: + r"""Function that measures the Binary Cross Entropy between the target and the prediction. + + :param pred: (N,*) where * means, any number of additional dimensions. + :param label: (N,*), same shape as the input. + + """ + assert pred.shape == label.shape + + return -1.0 * (label * log(pred) + (1.0 - label) * log(1 - pred)).mean() + + +def nll_loss( + pred: Tensor, label: Tensor, axis: int = 1, ignore_index: int = -1 +) -> Tensor: + r""" + The negative log likelihood loss. + + :param pred: The predicted result from model. + :param label: The ground truth to compare. + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + data_shape = (2, 2) + label_shape = (2, ) + + data = tensor( + np.array([[1, 0.5], [0.3, 1.2]], dtype=np.float32).reshape(data_shape), + ) + label = tensor( + np.ones(label_shape, dtype=np.int32) + ) + pred = F.log(F.softmax(data)) + loss1 = F.nll_loss(pred, label) + loss2 = F.cross_entropy_with_softmax(data, label) + print(loss1.numpy(), loss2.numpy()) + + Outputs: + + .. testoutput:: + + [0.6576154] [0.6576154] + + """ + raise NotImplementedError + # n0 = pred.ndim + # n1 = label.ndim + # assert n0 == n1 + 1, ( + # "target ndim must be one less than input ndim; input_ndim={} " + # "target_ndim={}".format(n0, n1) + # ) + + # mask = 1.0 - equal(label, ignore_index) + # label = label * mask + + # loss = indexing_one_hot(pred, label, axis) * mask + + # return -1.0 * loss.sum() / maximum(mask.sum(), 1.0) + + +def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor: + r""" + Caculate the hinge loss which is often used in SVMs. + + The hinge loss can be described as: + + .. math:: loss(x, y) = \frac{1}{N}\sum_i\sum_j(max(0, 1 - x_i_j*y_i_j)) + + :param pred: The input tensor representing the predicted probability, shape is (N, C). + :param label: The input tensor representing the binary classification label, shape is (N, C). + :param norm: Specify the norm to caculate the loss, should be "L1" or "L2". + + Examples: + + .. testcode:: + + from megengine import tensor + import megengine.functional as F + + pred = tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]], dtype="float32") + label = tensor([[1, -1, -1], [-1, 1, 1]], dtype="float32") + + loss = F.hinge_loss(pred, label) + + print(loss.numpy()) + + Outputs: + + .. testoutput:: + + [1.5] + + """ + assert norm in ["L1", "L2"], "norm must be L1 or L2" + # Converts binary labels to -1/1 labels. + loss = relu(1.0 - pred * label) + if norm == "L1": + return loss.sum(axis=1).mean() + else: + return (loss ** 2).sum(axis=1).mean() + + +def smooth_l1_loss(pred: Tensor, label: Tensor) -> Tensor: + r""" + Caculate the smooth l1 loss proposed in `Fast R-CNN paper by Ross Girshick`. + + The smooth l1 loss can be described as: + + .. math:: + \text{loss}(x, y) = \frac{1}{n} \sum_{i} l_{i} + + where :math:`l_{i}` is given by: + + .. math:: + l_{i} = + \begin{cases} + 0.5 (x_i - y_i)^2, & \text{if } |x_i - y_i| < 1 \\ + |x_i - y_i| - 0.5, & \text{otherwise } + \end{cases} + + :param pred: The predicted result from model. + :param label: The ground truth to compare. + + Examples: + + .. testcode:: + + from megengine import tensor + import megengine.functional as F + + pred = tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]]) + label = tensor([[0.4, 1.5, 1.2], [0., 0.1, 2.2]]) + + loss = F.smooth_l1_loss(pred, label) + + print(loss.numpy()) + + Outputs: + + .. testoutput:: + + [0.5608334] + """ + raise NotImplementedError + # diff = abs(pred - label) + # l2_loss = 0.5 * (diff ** 2) + # l1_loss = diff - 0.5 + # mask = diff < 1 + # loss = where(mask, l2_loss, l1_loss) + # return loss.mean() diff --git a/imperative/python/megengine/functional/math.py b/imperative/python/megengine/functional/math.py new file mode 100644 index 0000000000000000000000000000000000000000..3483ad816dc9e1193d4202e809172e3c3a2b494b --- /dev/null +++ b/imperative/python/megengine/functional/math.py @@ -0,0 +1,696 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections +import functools +import math +import numbers +from typing import Optional, Sequence, Tuple, Union + +from ..core.ops import builtin +from ..core.ops._internal import param_defs as P +from ..core.tensor import utils +from ..core.tensor.core import apply +from ..tensor import Tensor +from .elemwise import clamp, exp, log, log1p +from .tensor import remove_axis, reshape + +__all__ = [ + "all", # TODO + "all_close", # TODO + "any", # TODO + "argmax", + "argmin", + "argsort", + "isinf", + "isnan", # TODO + "max", + "mean", + "median", # TODO + "min", + "norm", + "normalize", + "prod", + "sign", # TODO + "sort", + "std", + "sum", + "topk", + "unique", # TODO + "var", +] + + +def all(inp): + raise NotImplementedError + + +def all_close(inp): + raise NotImplementedError + + +def any(inp): + raise NotImplementedError + + +def unique(inp): + raise NotImplementedError + + +def isnan(inp: Tensor) -> Tensor: + r"""Returns a new tensor representing if each element is NaN or not. + + :param: inp + :return: a new tensor representing if each element in :attr:`inp` is NaN or not. + + Examples: + + .. testcode:: + + from megengine import tensor + import megengine.functional as F + + x = tensor([1, float("nan"), 0]) + + print(F.isnan(x)) + + .. testoutput:: + + Tensor([0 1 0], dtype=uint8) + + """ + raise NotImplementedError + # return (inp != inp).astype("uint8") + + +def isinf(inp: Tensor) -> Tensor: + r"""Returns a new tensor representing if each element is Inf or not. + + :param: inp + :return: a new tensor representing if each element in :attr:`inp` is Inf or not. + + Examples: + + .. testcode:: + + from megengine import tensor + import megengine.functional as F + + x = tensor([1, float("inf"), 0]) + + print(F.isinf(x)) + + .. testoutput:: + + Tensor([0 1 0], dtype=uint8) + + """ + return (abs(inp).astype("float32") == float("inf")).astype("uint8") + + +def sign(inp: Tensor): + raise NotImplementedError + + +def _reduce( + data, + *, + mode, + axis: Optional[Union[int, Sequence[int]]] = None, + keepdims: bool = False +): + (data,) = utils.convert_inputs(data) + if axis is None: + data = data.reshape(-1) + assert not keepdims, "can not set axis=None and keepdims=True" + + op = builtin.Reduce(mode=mode, axis=0) + (result,) = apply(op, data) + elif isinstance(axis, collections.Iterable): + axis = list(axis) + axis.sort(reverse=True) + + for ai in axis: + op = builtin.Reduce(mode=mode, axis=ai) + (data,) = apply(op, data) + if not keepdims: + data = remove_axis(data, ai) + result = data + else: + op = builtin.Reduce(mode=mode, axis=axis) + (result,) = apply(op, data) + + if not keepdims: + result = remove_axis(result, axis) + + return result + + +def sum( + inp: Tensor, + axis: Optional[Union[int, Sequence[int]]] = None, + keepdims: bool = False, +) -> Tensor: + r"""Returns the sum of each row of the ``inp`` tensor in the given ``axis``. + + :param inp: The input tensor. + :param axis: The dimension to reduce. If None, all the dimensions will be reduced. + Default: None + :param keepdims: Whether the output tensor has ``axis`` retained or not. + Default: False + :return: The output tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + data = tensor(np.arange(1, 7, dtype=np.int32).reshape(2, 3)) + out = F.sum(data) + print(out.numpy()) + + .. testoutput:: + + [21] + + """ + return _reduce(inp, mode="SUM", axis=axis, keepdims=keepdims) + + +def prod( + inp: Tensor, axis: Optional[Union[int, Sequence[int]]] = None, keepdims=False +) -> Tensor: + r""" + Returns the element product of input tensor along given *axis*. + + :param inp: The input tensor + :param axis: The dimension to reduce. If None, all the dimensions will be reduced. Default: ``None`` + :param keepdims: Whether the output tensor has *axis* retained or not. Default: ``False`` + :return: The output tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + data = tensor(np.arange(1, 7, dtype=np.int32).reshape(2, 3)) + out = F.prod(data) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [720] + + """ + return _reduce(inp, mode="PRODUCT", axis=axis, keepdims=keepdims) + + +def mean( + inp: Tensor, + axis: Optional[Union[int, Sequence[int]]] = None, + keepdims: bool = False, +) -> Tensor: + """Returns the mean value of each row of the ``inp`` tensor in + the given ``axis``. If axis is a list of dimensions, + reduce over all of them. + + :param inp: The input tensor + :param axis: The dimension to reduce. If None, all the dimensions will be reduced. Default: None + :param keepdims: Whether the output tensor has ``axis`` retained or not. Default: False + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + data = tensor(np.arange(1, 7, dtype=np.int32).reshape(2, 3)) + out = F.mean(data) + print(out.numpy()) + + .. testoutput:: + + [3.5] + + """ + return _reduce(inp, mode="MEAN", axis=axis, keepdims=keepdims) + + +def median( + inp: Tensor, + axis: Optional[Union[int, Sequence[int]]] = None, + keepdims: bool = False, +) -> Tensor: + raise NotImplementedError + + +def var( + inp: Tensor, + axis: Optional[Union[int, Sequence[int]]] = None, + keepdims: bool = False, +) -> Tensor: + """Returns the variance value of input tensor along + given ``axis``. If axis is a list of dimensions, + reduce over all of them. + + :param inp: The input tensor. + :param axis: The dimension to reduce. If None, all the dimensions will be reduced. Default: ``None``. + :param keepdims: Whether the output tensor has ``axis`` retained or not. Default: ``False``. + :return: The output tensor. + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + data = tensor(np.arange(1, 7, dtype=np.float32).reshape(2, 3)) + out = F.var(data) + print(out.numpy()) + + .. testoutput:: + + [2.9166667] + """ + if axis is None: + m = mean(inp, axis=axis, keepdims=False) + else: + m = mean(inp, axis=axis, keepdims=True) + v = inp - m + return mean(v ** 2, axis=axis, keepdims=keepdims) + + +def std( + inp: Tensor, + axis: Optional[Union[int, Sequence[int]]] = None, + keepdims: bool = False, +) -> Tensor: + """Returns the standard deviation of input tensor along + given ``axis``. If axis is a list of dimensions, + reduce over all of them. + + :param inp: The input tensor. + :param axis: The dimension to reduce. If None, all the dimensions will be reduced. Default: ``None``. + :param keepdims: Whether the output tensor has ``axis`` retained or not. Default: ``False``. + :return: The output tensor. + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + data = tensor(np.arange(1, 7, dtype=np.float32).reshape(2, 3)) + out = F.std(data, axis=1) + print(out.numpy()) + + .. testoutput:: + + [0.8164966 0.8164966] + """ + return var(inp, axis=axis, keepdims=keepdims) ** 0.5 + + +def min( + inp: Tensor, + axis: Optional[Union[int, Sequence[int]]] = None, + keepdims: bool = False, +) -> Tensor: + r""" + Returns the min value of input tensor along given *axis*. + + :param inp: The input tensor + :param axis: The dimension to reduce. If None, all the dimensions will be reduced. Default: None + :param keepdims: Whether the output tensor has *axis* retained or not. Default: False + :return: The output tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x = tensor(np.arange(1, 7, dtype=np.int32).reshape(2,3)) + y = F.min(x) + print(y.numpy()) + + Outputs: + + .. testoutput:: + + [1] + + """ + return _reduce(inp, mode="MIN", axis=axis, keepdims=keepdims) + + +def max( + inp: Tensor, + axis: Optional[Union[int, Sequence[int]]] = None, + keepdims: bool = False, +) -> Tensor: + r"""Returns the max value of the input tensor along given *axis*. + + :param inp: The input tensor + :param axis: The dimension to reduce. If None, all the dimensions will be reduced. Default: None + :param keepdims: Whether the output tensor has *axis* retained or not. Default: False + :return: The output tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x = tensor(np.arange(1, 7, dtype=np.int32).reshape(2,3)) + y = F.max(x) + print(y.numpy()) + + .. testoutput:: + + [6] + + """ + return _reduce(inp, mode="MAX", axis=axis, keepdims=keepdims) + + +def norm( + inp: Tensor, + p: int = 2, + axis: Optional[Union[int, Sequence[int]]] = None, + keepdims=False, +): + """Calculate ``p``-norm of input tensor along certain axis. + + :param inp: The input tensor + :param p: power of value ``p`` applied to ``inp``. Default: 2 + :param axis: The dimension to reduce. If None, all the dimensions will be reduced. Default: None + :param keepdims: Whether the output tensor has ``axis`` retained or not. Default: False + :return: The output tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x = tensor(np.arange(-3, 3, dtype=np.float32).reshape(2,3)) + y = F.norm(x) + print(y.numpy()) + + .. testoutput:: + + [4.358899] + + """ + if p == 0: + return sum(inp != 0, axis=axis, keepdims=keepdims) + if p == math.inf: + return max(abs(inp)) + if p == -math.inf: + return min(abs(inp)) + return sum(abs(inp) ** p, axis=axis, keepdims=keepdims) ** (1.0 / p) + + +def argmin( + inp: Tensor, + axis: Optional[Union[int, Sequence[int]]] = None, + keepdims: bool = False, +) -> Tensor: + r"""Returns the indices of the minimum values along an axis + + :param inp: The input tensor + :param axis: The dimension to reduce. If None, all the dimensions will be reduced. Default: None + :param keepdims: Whether the output tensor has *axis* retained or not. Default: False + :return: The output tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x = tensor(np.arange(1, 7, dtype=np.int32).reshape(2,3)) + y = F.argmin(x) + print(y.numpy()) + + .. testoutput:: + + [0] + + """ + if isinstance(axis, collections.Iterable): + axis = list(axis) + axis.sort(reverse=True) + + for ai in axis: + op = builtin.Argmin(axis=ai) + (inp,) = apply(op, inp) + + if not keepdims: + inp = remove_axis(inp, ai) + + return inp + + if axis is None: + assert not keepdims, "can not set axis=None and keepdims=True" + inp = inp.flatten() + axis = 0 + + op = builtin.Argmin(axis=axis) + (result,) = apply(op, inp) + if not keepdims: + result = remove_axis(result, axis) + return result + + +def argmax( + inp: Tensor, + axis: Optional[Union[int, Sequence[int]]] = None, + keepdims: bool = False, +) -> Tensor: + r"""Returns the indices of the maximum values along an axis + + :param inp: The input tensor + :param axis: The dimension to reduce. If None, all the dimensions will be reduced. Default: None + :param keepdims: Whether the output tensor has *axis* retained or not. Default: False + :return: The output tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x = tensor(np.arange(1, 7, dtype=np.int32).reshape(2,3)) + y = F.argmax(x) + print(y.numpy()) + + .. testoutput:: + + [5] + + """ + if isinstance(axis, collections.Iterable): + axis = list(axis) + axis.sort(reverse=True) + + for ai in axis: + op = builtin.Argmax(axis=ai) + (inp,) = apply(op, inp) + + if not keepdims: + inp = remove_axis(inp, ai) + + return inp + + if axis is None: + assert not keepdims, "can not set axis=None and keepdims=True" + inp = inp.flatten() + axis = 0 + + op = builtin.Argmax(axis=axis) + (result,) = apply(op, inp) + if not keepdims: + result = remove_axis(result, axis) + return result + + +def normalize( + inp: Tensor, + p: int = 2, + axis: Optional[Union[int, Sequence[int]]] = None, + eps: float = 1e-12, +) -> Tensor: + r"""Perform :math:`L_p` normalization of input tensor along certain axis. + + For a tensor :attr:`inp` of shape :math:`(n_0, ..., n_{dim}, ..., n_k)`, each + :math:`n_{dim}` -element vector :math:`v` along dimension :attr:`axis` is transformed as: + + .. math:: + v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}. + + :param inp: the input tensor + :param p: power of value ``p`` applied to ``inp``. Default: 2 + :param axis: The dimension to reduce. If None, all the dimensions will be reduced + to calculate the norm. Default: None + :param eps: a small value to avoid division by zero. Default: 1e-12 + :return: the normalized output tensor + + """ + if axis is None: + return inp / clamp(norm(inp, p, axis), lower=eps) + else: + return inp / clamp(norm(inp, p, axis, keepdims=True), lower=eps) + + +def argsort(inp: Tensor, descending: bool = False) -> Tensor: + r""" + Sort the target 2d matrix by row, return both the sorted tensor and indices. + + :param inp: The input tensor, if 2d, each row will be sorted + :param descending: Sort in descending order, where the largest comes first. Default: ``False`` + :return: Tuple of two tensors (sorted_tensor, indices_of_int32) + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + data = tensor(np.array([1,2], dtype=np.float32)) + indices = F.argsort(data) + print(indices.numpy()) + + Outputs: + + .. testoutput:: + + [0 1] + + """ + assert len(inp.shape) <= 2, "Input should be 1d or 2d" + if descending: + order = P.Argsort.Order.DESCENDING + else: + order = P.Argsort.Order.ASCENDING + + op = builtin.Argsort(order=order) + if len(inp.shape) == 1: + inp = inp.reshape(1, -1) + _, result = apply(op, inp) + return result[0] + _, result = apply(op, inp) + return result + + +def sort(inp: Tensor, descending: bool = False) -> Tuple[Tensor, Tensor]: + assert len(inp.shape) <= 2, "Input should be 1d or 2d" + if descending: + order = P.Argsort.Order.DESCENDING + else: + order = P.Argsort.Order.ASCENDING + + op = builtin.Argsort(order=order) + if len(inp.shape) == 1: + inp = inp.reshape(1, -1) + tns, ind = apply(op, inp) + return tns[0], ind[0] + tns, ind = apply(op, inp) + return tns, ind + + +def topk( + inp: Tensor, + k: int, + descending: bool = False, + kth_only: bool = False, + no_sort: bool = False, +) -> Tuple[Tensor, Tensor]: + r""" + Selected the Top-K (by default) smallest elements of 2d matrix by row. + + :param inp: The input tensor, if 2d, each row will be sorted + :param k: The number of elements needed + :param descending: If true, return the largest elements instead. Default: ``False`` + :param kth_only: If true, only the k-th element will be returned. Default: ``False`` + :param no_sort: If true, the returned elements can be unordered. Default: ``False`` + :return: Tuple of two tensors (topk_tensor, indices_of_int32) + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + data = tensor(np.array([2, 4, 6, 8, 7, 5, 3, 1], dtype=np.float32)) + top, indices = F.topk(data, 5) + print(top.numpy(), indices.numpy()) + + Outputs: + + .. testoutput:: + + [1. 2. 3. 4. 5.] [7 0 6 1 5] + + """ + if descending: + inp = -inp + + Mode = P.TopK.Mode + if kth_only: + mode = Mode.KTH_ONLY + elif no_sort: + mode = Mode.VALUE_IDX_NOSORT + else: + mode = Mode.VALUE_IDX_SORTED + op = builtin.TopK(mode=mode) + + if len(inp.shape) == 1: + inp = inp.reshape(1, -1) + res = apply(op, inp, Tensor(k, dtype="int32")) + if kth_only: + tns = res[0] + else: + tns, ind = res[0][0], res[1][0] + else: + res = apply(op, inp, Tensor(k, dtype="int32")) + if kth_only: + tns = res + else: + tns, ind = res[0], res[1] + + if descending: + tns = -tns + return tns, ind diff --git a/imperative/python/megengine/functional/nn.py b/imperative/python/megengine/functional/nn.py new file mode 100644 index 0000000000000000000000000000000000000000..5596058dd6db47d4ecd7589f2f24407b929eba24 --- /dev/null +++ b/imperative/python/megengine/functional/nn.py @@ -0,0 +1,1556 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# pylint: disable=too-many-lines +from typing import Optional, Sequence, Tuple, Union + +from ..core._imperative_rt import CompNode +from ..core.ops import builtin +from ..core.ops._internal import param_defs as P +from ..core.ops.special import Const +from ..core.tensor import utils +from ..core.tensor.core import apply +from ..distributed import WORLD, is_distributed +from ..random import uniform +from ..tensor import Tensor +from .debug_param import get_conv_execution_strategy +from .distributed import all_reduce_sum +from .elemwise import exp, floor, log, log1p, maximum, minimum, relu +from .math import argsort, max, sum +from .tensor import add_axis, broadcast, concat, full, remove_axis, reshape +from .types import _pair, _pair_nonzero + +__all__ = [ + "linear", + "conv2d", + "conv_transpose2d", + "local_conv2d", + "max_pool2d", + "avg_pool2d", + "prelu", + "leaky_relu", + "softplus", + "log_softmax", + "logsigmoid", + "logsumexp", + "flatten", + "softmax", + "batch_norm2d", + "sync_batch_norm", + "one_hot", + "warp_perspective", + "matmul", + "interpolate", + "dropout", + "identity", + "embedding", + "roi_pooling", + "roi_align", + "assert_equal", + "indexing_one_hot", + "dot", + "svd", + "nms", + "batched_nms", +] + + +def expand_hw(x): + # NOTE: >1d array is accepted, as long as 1 <= size <= 2 + try: + x = int(x) + return [x, x] + except (TypeError, ValueError): + pass + h, w = x + return int(h), int(w) + + +def linear(inp: Tensor, weight: Tensor, bias: Optional[Tensor] = None) -> Tensor: + """Applies a linear transformation to the input. + + Refer to :class:`~.module.linear.Linear` for more information. + + :param inp: the input tensor with shape `(N, in_features)`. + :param weight: the weight with shape `(out_features, in_features)`. + :param bias: the bias with shape `(out_features,)`. + Default: ``None`` + """ + ret = matmul(inp, weight, transpose_b=True) + if bias is not None: + ret += bias + return ret + + +def conv2d( + inp: Tensor, + weight: Tensor, + bias: Optional[Tensor] = None, + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + groups: int = 1, + conv_mode="CROSS_CORRELATION", + compute_mode="DEFAULT", +) -> Tensor: + """2D convolution operation. + + Refer to :class:`~.Conv2d` for more information. + + :param inp: The feature map of the convolution operation + :param weight: The convolution kernel + :param bias: The bias added to the result of convolution (if given) + :param stride: Stride of the 2D convolution operation. Default: 1 + :param padding: Size of the paddings added to the input on both sides of its + spatial dimensions. Only zero-padding is supported. Default: 0 + :param dilation: Dilation of the 2D convolution operation. Default: 1 + :param groups: number of groups to divide input and output channels into, + so as to perform a "grouped convolution". When ``groups`` is not 1, + ``in_channels`` and ``out_channels`` must be divisible by ``groups``, + and the shape of weight should be ``(groups, out_channel // groups, + in_channels // groups, height, width)``. + :type conv_mode: string or :class:`P.Convolution.Mode` + :param conv_mode: Supports 'CROSS_CORRELATION' or 'CONVOLUTION'. Default: + 'CROSS_CORRELATION'. + :type compute_mode: string or + :class:`P.Convolution.ComputeMode` + :param compute_mode: When set to 'DEFAULT', no special requirements will be + placed on the precision of intermediate results. When set to 'FLOAT32', + Float32 would be used for accumulator and intermediate result, but only + effective when input and output are of Float16 dtype. + + """ + assert conv_mode == "CROSS_CORRELATION" or conv_mode.name == "CROSS_CORRELATION" + assert compute_mode == "DEFAULT" or compute_mode.name == "DEFAULT" + + stride_h, stride_w = expand_hw(stride) + pad_h, pad_w = expand_hw(padding) + dilate_h, dilate_w = expand_hw(dilation) + + Sparse = P.Convolution.Sparse + sparse_type = Sparse.DENSE if groups == 1 else Sparse.GROUP + op = builtin.Convolution( + stride_h=stride_h, + stride_w=stride_w, + pad_h=pad_h, + pad_w=pad_w, + dilate_h=dilate_h, + dilate_w=dilate_w, + strategy=get_conv_execution_strategy(), + mode=conv_mode, + compute_mode=compute_mode, + sparse=sparse_type, + ) + (output,) = apply(op, inp, weight) + if bias is not None: + output += bias + return output + + +def conv_transpose2d( + inp: Tensor, + weight: Tensor, + bias: Optional[Tensor] = None, + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + groups: int = 1, + conv_mode="CROSS_CORRELATION", + compute_mode="DEFAULT", +) -> Tensor: + """2D transposed convolution operation. + + Refer to :class:`~.ConvTranspose2d` for more information. + + :param inp: The feature map of the convolution operation + :param weight: The convolution kernel + :param bias: The bias added to the result of convolution (if given) + :param stride: Stride of the 2D convolution operation. Default: 1 + :param padding: Size of the paddings added to the input on both sides of its + spatial dimensions. Only zero-padding is supported. Default: 0 + :param dilation: Dilation of the 2D convolution operation. Default: 1 + :param groups: number of groups to divide input and output channels into, + so as to perform a "grouped convolution". When ``groups`` is not 1, + ``in_channels`` and ``out_channels`` must be divisible by ``groups``, + and the shape of weight should be ``(groups, out_channel // groups, + in_channels // groups, height, width)``. Default: 1 + :type conv_mode: string or :class:`P.Convolution.Mode` + :param conv_mode: Supports 'CROSS_CORRELATION' or 'CONVOLUTION'. Default: + 'CROSS_CORRELATION'. + :type compute_mode: string or + :class:`P.Convolution.ComputeMode` + :param compute_mode: When set to 'DEFAULT', no special requirements will be + placed on the precision of intermediate results. When set to 'FLOAT32', + Float32 would be used for accumulator and intermediate result, but only + effective when input and output are of Float16 dtype. + + """ + assert conv_mode == "CROSS_CORRELATION" or conv_mode.name == "CROSS_CORRELATION" + assert compute_mode == "DEFAULT" or compute_mode.name == "DEFAULT" + + if groups != 1: + raise NotImplementedError("TODO") + + stride_h, stride_w = expand_hw(stride) + pad_h, pad_w = expand_hw(padding) + dilate_h, dilate_w = expand_hw(dilation) + + op = builtin.ConvolutionBackwardData( + stride_h=stride_h, + stride_w=stride_w, + pad_h=pad_h, + pad_w=pad_w, + dilate_h=dilate_h, + dilate_w=dilate_w, + strategy=get_conv_execution_strategy(), + ) + (output,) = apply(op, inp, weight) + if bias is not None: + output += bias + return output + + +def local_conv2d( + inp: Tensor, + weight: Tensor, + bias: Optional[Tensor] = None, + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + conv_mode="CROSS_CORRELATION", +) -> Tensor: + """Applies spatial 2D convolution over an image with untied kernels. + + Refer to :class:`~.LocalConv2d` for more information. + """ + assert conv_mode == "CROSS_CORRELATION" or conv_mode.name == "CROSS_CORRELATION" + + stride_h, stride_w = expand_hw(stride) + pad_h, pad_w = expand_hw(padding) + dilate_h, dilate_w = expand_hw(dilation) + + op = builtin.GroupLocal( + stride_h=stride_h, + stride_w=stride_w, + pad_h=pad_h, + pad_w=pad_w, + dilate_h=dilate_h, + dilate_w=dilate_w, + strategy=get_conv_execution_strategy(), + ) + (output,) = apply(op, inp, weight) + if bias is not None: + output += bias + return output + + +def max_pool2d( + inp: Tensor, + kernel_size: Union[int, Tuple[int, int]], + stride: Optional[Union[int, Tuple[int, int]]] = None, + padding: Union[int, Tuple[int, int]] = 0, +) -> Tensor: + """Applies a 2D max pooling over an input. + + Refer to :class:`~.MaxPool2d` for more information. + + :param inp: The input tensor. + :param kernel_size: The size of the window. + :param stride: The stride of the window. If not provided, its value is set to ``kernel_size``. + Default: None + :param padding: Implicit zero padding to be added on both sides. Default: 0 + + """ + if stride is None: + stride = kernel_size + window_h, window_w = _pair_nonzero(kernel_size) + stride_h, stride_w = _pair_nonzero(stride) + padding_h, padding_w = _pair(padding) + + op = builtin.Pooling( + window_h=window_h, + window_w=window_w, + stride_h=stride_h, + stride_w=stride_w, + pad_h=padding_h, + pad_w=padding_w, + mode="MAX", + ) + (output,) = apply(op, inp) + return output + + +def avg_pool2d( + inp: Tensor, + kernel_size: Union[int, Tuple[int, int]], + stride: Optional[Union[int, Tuple[int, int]]] = None, + padding: Union[int, Tuple[int, int]] = 0, + mode: str = "AVERAGE_COUNT_EXCLUDE_PADDING", +) -> Tensor: + """ Applies a 2D average pooling over an input. + + Refer to :class:`~.AvgPool2d` for more information. + + :param inp: The input tensor. + :param kernel_size: The size of the window. + :param stride: The stride of the window. If not provided, its value is set to ``kernel_size``. + Default: None + :param padding: Implicit zero padding to be added on both sides. Default: 0 + :param mode: Whether to count padding values. Default: "AVERAGE_COUNT_EXCLUDE_PADDING" + + """ + if stride is None: + stride = kernel_size + window_h, window_w = _pair_nonzero(kernel_size) + stride_h, stride_w = _pair_nonzero(stride) + padding_h, padding_w = _pair(padding) + + op = builtin.Pooling( + window_h=window_h, + window_w=window_w, + stride_h=stride_h, + stride_w=stride_w, + pad_h=padding_h, + pad_w=padding_w, + mode=mode, + ) + (output,) = apply(op, inp) + return output + + +def prelu(inp: Tensor, weight: Tensor) -> Tensor: + r""" + Applies the element-wise PReLU function. + + Refer to :class:`~.PReLU` for more information. + """ + return maximum(inp, 0) + weight * minimum(inp, 0) + + +def leaky_relu(inp: Tensor, negative_slope: float = 0.01) -> Tensor: + r""" + Applies the element-wise leaky_relu function + + Refer to :class:`~.LeakyReLU` for more information. + """ + return maximum(inp, 0) + negative_slope * minimum(inp, 0) + + +def softplus(inp: Tensor) -> Tensor: + r"""Applies the element-wise function: + + .. math:: + \text{softplus}(x) = \log(1 + \exp(x)) + + softplus is a smooth approximation to the ReLU function and can be used + to constrain the output of a machine to always be positive. + For numerical stability the implementation follows this transformation: + + .. math:: + \text{softplus}(x) = \log(1 + \exp(x)) + = \log(1 + \exp(-\text{abs}(x))) + \max(x, 0) + = \log1p(\exp(-\text{abs}(x))) + \text{relu}(x) + + :param inp: The input tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x = tensor(np.arange(-3, 3, dtype=np.float32)) + y = F.softplus(x) + print(y.numpy()) + + .. output:: + + [0.04858735 0.126928 0.3132617 0.6931472 1.3132617 2.126928 ] + + """ + return log1p(exp(-abs(inp))) + relu(inp) + + +def log_softmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: + r"""Applies the :math:`\log(\text{Softmax}(x))` function to an n-dimensional + input Tensor. The LogSoftmax formulation can be simplified as: + + .. math:: + \text{LogSoftmax}(x_{i}) = \log(\frac{\exp(x_i) }{ \sum_j \exp(x_j)} ) + + For numerical stability the implementation follows this transformation: + + .. math:: + \operatorname{logsoftmax}(x) + = \log (\frac{\exp (x)}{\sum_{i}(\exp (x_{i}))}) + = x - \log (\sum_{i}(\exp (x_{i}))) + = x - logsumexp(x) + + :param inp: The input tensor + :param axis: An axis along which log_softmax will be applied. + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) + y = F.log_softmax(x, axis=1) + print(y.numpy()) + + .. output:: + + [[-4.4519143 -3.4519143 -2.4519143 -1.4519144 -0.4519144] + [-4.4519143 -3.4519143 -2.4519143 -1.4519144 -0.4519144]] + + """ + return inp - logsumexp(inp, axis, keepdims=True) + + +def logsigmoid(inp: Tensor) -> Tensor: + r"""Applies the element-wise function: + + .. math:: + \text{logsigmoid}(x) = \log(\frac{ 1 }{ 1 + \exp(-x)}) + = \log(1/(1 + exp(-x))) + = - \log(1 + exp(-x)) + = - \text{softplus}(-x) + + :param inp: The input tensor + + Examples: + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x = tensor(np.arange(-5, 5, dtype=np.float32)) + y = F.logsigmoid(x) + print(y.numpy()) + + .. output:: + + [-5.0067153 -4.01815 -3.0485873 -2.126928 -1.3132617 -0.6931472 -0.3132617 -0.126928 -0.04858735 -0.01814993] + + """ + return -softplus(-inp) + + +def logsumexp( + inp: Tensor, axis: Union[int, Sequence[int]], keepdims: bool = False +) -> Tensor: + r""" + Compute the log of the sum of exponentials of inputs along the given :attr:`axis`. + The computation is numerically stabilized. + + .. math:: + + \operatorname{logsumexp}(\boldsymbol{x})= \log \sum_{j=1}^{n} \exp \left(x_{j}\right) + + For numerical stability, the implementation follows this transformation: + + .. math:: + + \operatorname{logsumexp}(\boldsymbol{x})= \log \sum_{j=1}^{n} \exp \left(x_{j}\right) + = \operatorname{logsumexp}(\boldsymbol{x})=b+\log \sum_{j=1}^{n} \exp \left(x_{j}-b\right) + + where + + .. math:: + b = \max(x_j) + + :param inp: The input tensor. + :param axis: Axis over which the sum is taken. It can be a single axis or a list of axes. + :param keepdims: whether to retain :attr:`axis` or not for the output tensor. + + Examples: + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) + y = F.logsumexp(x, axis=1, keepdims=False) + print(y.numpy()) + + .. output:: + + [-0.5480856 4.4519143] + + """ + max_value = max(inp, axis, keepdims=True) + if keepdims: + return max_value + log(sum(exp(inp - max_value), axis, keepdims)) + else: + return remove_axis(max_value, axis=None) + log( + sum(exp(inp - max_value), axis, keepdims) + ) + + +def flatten(inp: Tensor, start_axis: int = 0, end_axis: int = -1) -> Tensor: + r""" + Reshapes the tensor by flattening the sub-tensor from dimension ``start_axis`` to dimension ``end_axis``. + + :param inp: The input tensor. + :param start_axis: The start dimension that the sub-tensor to be flattened. Default: 0 + :param end_axis: The end dimension that the sub-tensor to be flattened. Default: -1 + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + inp_shape = (2, 2, 3, 3) + inp = tensor( + np.arange(36, dtype=np.int32).reshape(inp_shape), + ) + oup = F.flatten(inp, 2) + print(inp.numpy().shape) + print(oup.numpy().shape) + + Outputs: + + .. testoutput:: + + (2, 2, 3, 3) + (2, 2, 9) + + """ + target_shape = tuple(inp.shape[i] for i in range(start_axis)) + (-1,) + if end_axis != -1: + target_shape += (*inp.shape[end_axis + 1 :],) + return inp.reshape(*target_shape) + + +def _get_softmax_axis(ndim: int) -> int: + if ndim in (0, 1, 3): + return 0 + return 1 + + +def softmax(inp: Tensor, axis: Optional[int] = None) -> Tensor: + r""" + Applies a softmax function. Softmax is defined as: + + .. math:: + \text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)} + + It is applied to all elements along axis, and will re-scale them so that + the elements lie in the range `[0, 1]` and sum to 1. + + See :class:`~megengine.module.activation.Softmax` for more details. + + :param inp: The input tensor. + :param axis: An axis along which softmax will be applied. By default, + softmax will apply along the highest ranked axis. + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) + out = F.softmax(x) + print(out.numpy()) + + Outputs: + + .. testoutput:: + [[0.01165623 0.03168492 0.08612854 0.23412167 0.6364086 ] + [0.01165623 0.03168492 0.08612854 0.23412167 0.6364086 ]] + + """ + if axis is None: + axis = _get_softmax_axis(len(inp.shape)) + offset = inp.max(axis=axis).detach() + cached = exp(inp - offset) + down = sum(cached, axis=axis, keepdims=True) + return cached / down + + +def batch_norm2d( + data: Tensor, + running_mean: Tensor = None, + running_var: Tensor = None, + weight: Optional[Tensor] = None, + bias: Optional[Tensor] = None, + *, + training: bool = False, + momentum: float = 0.9, + eps: float = 1e-5, + inplace: bool = True +): + """Applies batch normalization to the input. + + Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. + + :param inp: input tensor. + :param running_mean: tensor to store running mean. + :param running_var: tensor to store running variance. + :param weight: scaling tensor in the learnable affine parameters. + See :math:`\gamma` in :class:`~.BatchNorm2d` + :param bias: bias tensor in the learnable affine parameters. + See :math:`\beta` in :class:`~.BatchNorm2d` + :param training: a boolean value to indicate whether batch norm is performed + in traning mode. Default: ``False`` + :param momentum: the value used for the ``running_mean`` and ``running_var`` + computation. + Default: 0.9 + :param eps: a value added to the denominator for numerical stability. + Default: 1e-5. + :param inplace: whether to update running_mean and running_var inplace or return new tensors + Default: True + + """ + from .tensor import expand_dims, squeeze, broadcast + + def full(value): + N, C, H, W = data.shape + (x,) = Const(value, dtype=data.dtype, device=data.device)(data) + return broadcast(x, [1, C, 1, 1]) + + def expand_or_full(x, value): + if x is None: + return full(value) + return expand_dims(x, [0, 2, 3]) + + def make_full_if_none(x, value): + if x is None: + return full(value) + return x + + has_mean = running_mean is not None + has_var = running_var is not None + + if not training: + assert has_mean, "running_mean must be provided in inference mode" + assert has_var, "running_var must be provided in inference mode" + + if has_mean and running_mean.ndim != 4: + raise ValueError + if has_var and running_var.ndim != 4: + raise ValueError + + data, weight, bias, running_mean, running_var = utils.convert_inputs( + data, weight, bias, running_mean, running_var + ) + + weight = expand_or_full(weight, 1) + bias = expand_or_full(bias, 0) + + if not training: + op = builtin.BatchNorm(fwd_mode="INFERENCE", epsilon=eps, param_dim="DIM_1C11") + ret = apply(op, data, weight, bias, running_mean, running_var)[-1] + return ret + + else: + op = builtin.BatchNorm( + avg_factor=1 - momentum, epsilon=eps, param_dim="DIM_1C11" + ) + + if has_mean or has_var: + running_mean = make_full_if_none(running_mean, 0) + running_var = make_full_if_none(running_var, 1) + new_mean, new_var, _, _, data = apply( + op, data, weight, bias, running_mean, running_var + ) + if not has_mean: + new_mean = None + if not has_var: + new_var = None + + if inplace: + if has_mean: + running_mean[...] = new_mean + if has_var: + running_var[...] = new_var + + return data + else: + return data, new_mean, new_var + else: + _, _, data, = apply(op, data, weight, bias) + return data + + +def sync_batch_norm( + input: Tensor, + running_mean: Tensor, + running_var: Tensor, + weight: Optional[Tensor] = None, + bias: Optional[Tensor] = None, + training: bool = False, + momentum: Union[float, Tensor] = 0.9, + eps: float = 1e-5, + eps_mode="ADDITIVE", + group=WORLD, +) -> Tensor: + """ Applies synchronized batch normalization to the input. + + Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. + + :param inp: input tensor. + :param running_mean: tensor to store running mean. + :param running_var: tensor to store running variance. + :param weight: scaling tensor in the learnable affine parameters. + See :math:`\gamma` in :class:`~.BatchNorm2d` + :param bias: bias tensor in the learnable affine parameters. + See :math:`\beta` in :class:`~.BatchNorm2d` + :param training: a boolean value to indicate whether batch norm is performed + in traning mode. Default: ``False`` + :param momentum: the value used for the ``running_mean`` and ``running_var`` + computation. + Default: 0.9 + :param eps: a value added to the denominator for numerical stability. + Default: 1e-5. + """ + assert eps_mode in {"MAX", "ADDITIVE"}, "unknown eps_mode: {}".format(eps_mode) + _channels = input.shape[1] + _ndim = len(input.shape) + _param_shape = (1, _channels) + (1,) * (_ndim - 2) + + if training: + + def _sum_on_channel(input): + return apply(builtin.Reduce(mode="SUM"), input, Tensor(_param_shape))[0] + + reduce_size = input.shape[0] + for i in range(2, _ndim): + reduce_size = reduce_size * input.shape[i] + channel_x1s = _sum_on_channel(input) + channel_x2s = _sum_on_channel(input ** 2) + + if is_distributed(): + # reduce all nodes' data to calculate mean and variance + reduce_size = full([1 for _ in range(_ndim)], reduce_size) + stat = concat([reduce_size, channel_x1s, channel_x2s], axis=1) + stat = all_reduce_sum(stat, group) + reduce_size = stat[:, :1].reshape(1) + channel_x1s = stat[:, 1 : 1 + _channels] + channel_x2s = stat[:, 1 + _channels :] + + channel_mean = channel_x1s / reduce_size + channel_variance = ( + channel_x1s ** 2 / (-reduce_size * reduce_size) + channel_x2s / reduce_size + ) + else: + assert running_var is not None and running_mean is not None + channel_variance = running_var.reshape(*_param_shape) + channel_mean = running_mean.reshape(*_param_shape) + + invsqrt_channel_variance = ( + maximum(channel_variance, eps) if eps_mode == "MAX" else channel_variance + eps + ) ** -0.5 + + if weight is not None: + weight = weight.reshape(*_param_shape) + if bias is not None: + bias = bias.reshape(*_param_shape) + + # outvar = output * weight + bias + # where output = input * invsqrt_channel_variance + ( + # -channel_mean * invsqrt_channel_variance + # ) + # Manually expand output for gopt + + if weight is not None: + inv_var_wt = invsqrt_channel_variance * weight + neg_channel_mean = -channel_mean + if bias is not None: + outvar = input * inv_var_wt + (neg_channel_mean * inv_var_wt + bias) + else: + outvar = input * inv_var_wt + neg_channel_mean * inv_var_wt + else: + outvar = input * invsqrt_channel_variance + ( + -channel_mean * invsqrt_channel_variance + ) + if bias is not None: + outvar = outvar + bias + + if training and running_var is not None and running_mean is not None: + running_mean *= momentum + running_mean += (1 - momentum) * channel_mean + channel_variance_unbiased = channel_x1s ** 2 / ( + -reduce_size * (reduce_size - 1) + ) + channel_x2s / (reduce_size - 1) + running_var *= momentum + running_var += (1 - momentum) * channel_variance_unbiased + + return outvar + + +def one_hot(inp: Tensor, num_classes: int) -> Tensor: + r""" + Perform one-hot encoding for the input tensor. + + :param inp: input tensor + :param num_classes: number of classes denotes the last dimension of the output tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + inp = tensor(np.arange(1, 4, dtype=np.int32)) + out = F.one_hot(inp, num_classes=4) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [[0 1 0 0] + [0 0 1 0] + [0 0 0 1]] + + """ + raise NotImplementedError + # comp_node, comp_graph = _decide_comp_node_and_comp_graph(inp) + + # zeros = mgb.make_immutable(value=0, comp_node=comp_node, comp_graph=comp_graph) + # zeros_symvar = zeros.broadcast(inp.shapeof(), num_classes) + + # ones = mgb.make_immutable(value=1, comp_node=comp_node, comp_graph=comp_graph) + # ones_symvar = ones.broadcast(inp.shapeof(), 1) + + # return Tensor( + # mgb.opr.indexing_set_one_hot( + # zeros_symvar, axis=len(inp.shapeof()), index=inp, value=ones_symvar + # ) + # ) + + +def warp_perspective( + inp: Tensor, + M: Tensor, + dsize: Union[Tuple[int, int], int, Tensor], + border_mode: str = "REPLICATE", + border_val: float = 0.0, + interp_mode: str = "LINEAR", +): + r""" + Applies perspective transformation to batched 2D images. + + The input images are transformed to the output images by the transformation matrix: + + .. math:: + \text{output}(n, c, h, w) = \text{input} \left( n, c, + \frac{M_{00}h + M_{01}w + M_{02}}{M_{20}h + M_{21}w + M_{22}}, + \frac{M_{10}h + M_{11}w + M_{12}}{M_{20}h + M_{21}w + M_{22}} + \right) + + :param inp: input image + :param M: (batch, 3, 3) transformation matrix + :param dsize: (h, w) size of the output image + :param border_mode: pixel extrapolation method. Default: ``"REPLICATE"`` + :param border_val: value used in case of a constant border. Default: ``0`` + :param interp_mode: interpolation methods. Default: ``"LINEAR"`` + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + inp_shape = (1, 1, 4, 4) + inp = tensor(np.arange(16, dtype=np.float32).reshape(inp_shape)) + M_shape = (1, 3, 3) + # M defines a translation: dst(1, 1, h, w) = rst(1, 1, h+1, w+1) + M = tensor(np.array([[1., 0., 1.], + [0., 1., 1.], + [0., 0., 1.]], dtype=np.float32).reshape(M_shape)) + out = F.warp_perspective(inp, M, (2, 2)) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [[[[ 5. 6.] + [ 9. 10.]]]] + + """ + op = builtin.WarpPerspective( + imode=interp_mode, bmode=border_mode, format="NCHW", border_val=border_val + ) + (result,) = apply(op, inp, M, Tensor(dsize)) + return result + + +def matmul( + inp1: Tensor, + inp2: Tensor, + transpose_a=False, + transpose_b=False, + compute_mode="DEFAULT", + format="DEFAULT", +) -> Tensor: + """ + Performs a matrix multiplication of the matrices ``inp1`` and ``inp2``. + + With different inputs dim, this function behaves differently: + + - Both 1-D tensor, simply forward to dot. + - Both 2-D tensor, normal matrix multiplication. + - If one input tensor is 1-D, matrix vector multiplication. + - If at least one tensor are 3-dimensional or >3-dimensional, the batched matrix-matrix is returned, and the tensor with smaller dimension will + be broadcasted. For example: + - inp1: `(k, m)`, inp2: `(m, p)`, return: `(k, p)` + - inp1: `(n, k, m)`, inp2: `(n, m, p)`, return: `(n, k, p)` + - inp1: `(n, k, m)`, inp2: `(m, p)`, return: `(n, k, p)` + - inp1: `(n, j, k, m)`, inp2: `(n, j, m, p)`, return: `(n, j, k, p)` + + :param inp1: The first matrix to be multiplied + :param inp2: The second matrix to be multiplied + :return: The output tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + data1 = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) + data2 = tensor(np.arange(0, 6, dtype=np.float32).reshape(3, 2)) + out = F.matmul(data1, data2) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [[10. 13.] + [28. 40.]] + + """ + inp1, inp2 = utils.convert_inputs(inp1, inp2) + dim1, dim2 = inp1.ndim, inp2.ndim + if dim1 == 1 and dim2 == 1: + return dot(inp1, inp2) + + shp = None + if dim1 > 3 or dim2 > 3: + shape1, shape2 = list(inp1.shape), list(inp2.shape) + if dim1 != dim2: + if dim1 < dim2: + shape1 = shape2[: dim2 - dim1] + shape1 + inp1 = inp1.broadcast(*shape1) + else: + shape2 = shape1[: dim1 - dim2] + shape2 + inp2 = inp2.broadcast(*shape2) + reshaped_batch_size = 1 + for i in shape1[:-2]: + reshaped_batch_size *= i + inp1 = inp1.reshape(*([reshaped_batch_size] + shape1[-2:])) + inp2 = inp2.reshape(*([reshaped_batch_size] + shape2[-2:])) + op = builtin.BatchedMatrixMul( + transposeA=transpose_a, + transposeB=transpose_b, + compute_mode=compute_mode, + format=format, + ) + shp = shape1[:-1] + shape2[-1:] + elif dim1 == 3 or dim2 == 3: + if dim2 < 3: + inp2 = inp2.broadcast(*(inp1.shape[:1] + inp2.shape)) + elif dim1 < 3: + inp1 = inp1.broadcast(*(inp2.shape[:1] + inp1.shape)) + op = builtin.BatchedMatrixMul( + transposeA=transpose_a, + transposeB=transpose_b, + compute_mode=compute_mode, + format=format, + ) + else: + if dim1 == 1: + shp = (inp2.shape[1],) + inp1 = add_axis(inp1, 0) + if dim2 == 1: + shp = (inp1.shape[0],) + inp2 = add_axis(inp2, 1) + op = builtin.MatrixMul( + transposeA=transpose_a, + transposeB=transpose_b, + compute_mode=compute_mode, + format=format, + ) + + (result,) = apply(op, inp1, inp2) + if shp is not None: + result = result.reshape(shp) + return result + + +def dot(inp1: Tensor, inp2: Tensor) -> Tensor: + """ + Compute dot-product of two vectors ``inp1`` and ``inp2``. + inputs must be 1-dimensional, scalar input can be automatically broadcasted. + + :param inp1: The first vector + :param inp2: The second vector + :return: The output value + + Examples: + + .. teestcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + data1 = tensor(np.arange(0, 6, dtype=np.float32)) + data2 = tensor(np.arange(0, 6, dtype=np.float32)) + out = F.dot(data1, data2) + print(out.numpy()) + + Outputs: + + [55.] + + .. testoutputs:: + """ + op = builtin.Dot() + inp1, inp2 = utils.convert_inputs(inp1, inp2) + (result,) = apply(op, inp1, inp2) + return result + + +def svd(inp: Tensor, full_matrices=False, compute_uv=True) -> Tensor: + """ + Compute the singular value decompositions of input matrix ``inp``. + + :param inp: The input matrix, must has shape ``[..., M, N]`` + :return: The output matrices, U, sigma, V + + Examples: + + .. teestcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2,3)) + _, y, _ = F.svd(x) + print(y.numpy()) + + Outputs: + + [7.348, 1.] + + """ + op = builtin.SVD(full_matrices=full_matrices, compute_uv=compute_uv) + U, sigma, V = apply(op, inp) + return U, sigma, V + + +def interpolate( + inp: Tensor, + size: Optional[Union[int, Tuple[int, int]]] = None, + scale_factor: Optional[Union[float, Tuple[float, float]]] = None, + mode: str = "BILINEAR", + align_corners: bool = None, +) -> Tensor: + r""" + Down/up samples the input tensor to either the given :attr:`size` or the given + :attr:`scale_factor` + + :param inp: input tensor + :param size: size of the output tensor. Default: ``None`` + :param scale_factor: scaling factor of the output tensor. Default: ``None`` + :param mode: interpolation methods, acceptable values are: + 'BILINEAR', 'LINEAR'. Default: ``BILINEAR`` + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + from megengine.test import assertTensorClose + + inp = tensor(np.arange(1, 5, dtype=np.float32).reshape(1, 1, 2, 2)) + out = F.interpolate(inp, [4, 4], align_corners=False) + print(out.numpy()) + + out2 = F.interpolate(inp, scale_factor=2.) + assertTensorClose(out.numpy(), out2.numpy()) + + Outputs: + + .. testoutput:: + + [[[[1. 1.25 1.75 2. ] + [1.5 1.75 2.25 2.5 ] + [2.5 2.75 3.25 3.5 ] + [3. 3.25 3.75 4. ]]]] + + """ + mode = mode.upper() + if mode not in ["BILINEAR", "LINEAR"]: + raise ValueError("interpolate only support linear or bilinear mode") + if mode not in ["BILINEAR", "LINEAR"]: + if align_corners is not None: + raise ValueError( + "align_corners option can only be set in the bilinear/linear interpolating mode" + ) + else: + if align_corners is None: + align_corners = False + + if mode == "LINEAR": + inp = add_axis(inp, 3) + + if len(inp.shape) != 4: + raise ValueError("shape of input tensor must correspond to the operartion mode") + + if size is None: + if scale_factor is None: + raise ValueError("scale_factor must not be None when size is None") + + if isinstance(scale_factor, (float, int)): + scale_factor = float(scale_factor) + if mode == "LINEAR": + scale_factor = (scale_factor, float(1)) + else: + scale_factor = (scale_factor, scale_factor) + else: + if mode == "LINEAR": + raise ValueError( + "under LINEAR mode, scale_factor can only be single value" + ) + + assert len(scale_factor) == 2, "shape of scale_factor must be equal to (2, )" + assert isinstance(scale_factor[0], float) and isinstance( + scale_factor[1], float + ), "scale_factor must be float type" + dsize = tuple( + floor( + Tensor( + inp.shape[i + 2] * scale_factor[i], + dtype="float32", + device=inp.device, + ) + ) + for i in range(2) + ) + dsize = concat([dsize[0], dsize[1]], axis=0) + else: + if scale_factor is not None: + raise ValueError("scale_factor must be None when size is provided") + + if isinstance(size, int): + size = (size, 1) + else: + if mode == "LINEAR": + raise ValueError("under LINEAR mode, size can only be single value") + dsize = size + + oh, ow = dsize[0], dsize[1] + ih, iw = inp.shape[2], inp.shape[3] + + if align_corners: + hscale = (ih - 1.0) / (oh - 1.0) + wscale = 1.0 * iw / ow + if mode != "LINEAR": + wscale = (iw - 1.0) / (ow - 1.0) + row0 = concat( + [wscale, Tensor([0, 0], dtype="float32", device=inp.device)], axis=0 + ).reshape(1, 3) + row1 = concat( + [ + Tensor(0, dtype="float32", device=inp.device), + hscale, + Tensor(0, dtype="float32", device=inp.device), + ], + axis=0, + ).reshape(1, 3) + weight = concat( + [row0, row1, Tensor([[0, 0, 1]], dtype="float32", device=inp.device)], + axis=0, + ).reshape(1, 3, 3) + weight = broadcast(weight, (inp.shape[0], 3, 3)) + else: + hscale = 1.0 * ih / oh + wscale = 1.0 * iw / ow + row0 = concat( + [wscale, Tensor(0, dtype="float32", device=inp.device), 0.5 * wscale - 0.5], + axis=0, + ).reshape(1, 3) + row1 = concat( + [Tensor(0, dtype="float32", device=inp.device), hscale, 0.5 * hscale - 0.5], + axis=0, + ).reshape(1, 3) + weight = concat( + [row0, row1, Tensor([[0, 0, 1]], dtype="float32", device=inp.device)], + axis=0, + ).reshape(1, 3, 3) + weight = broadcast(weight, (inp.shape[0], 3, 3)) + + weight = weight.astype("float32") + ret = warp_perspective(inp, weight, dsize, interp_mode="LINEAR") + if mode == "LINEAR": + ret = reshape(ret, ret.shape[0:3]) + return ret + + +def dropout(inp: Tensor, drop_prob: float, rescale: bool = True) -> Tensor: + """ + Returns a new tensor where each of the elements are randomly set to zero + with probability P = ``drop_prob``. Optionally rescale the output tensor. + + :param inp: The input tensor + :param drop_prob: The probability to drop (set to zero) a single element + :param rescale: The default behavior of ``dropout`` during training is to rescale the output, + then it can be replaced by an :class:`~.Identity` during inference, default to True. + :return: The output tensor + + Examples: + + .. testcode:: + + import numpy as np + import megengine as mge + + import megengine.functional as F + from megengine import tensor + + data = tensor(np.ones(10, dtype=np.float32)) + out = F.dropout(data, 1./3.) + print(out.numpy()) + + Outputs: + + .. testoutput:: + :options: +SKIP + + [1.5 1.5 0. 1.5 1.5 1.5 1.5 1.5 1.5 1.5] + + """ + assert 0 <= drop_prob < 1 + rv = uniform(inp.shape) + mask = rv > drop_prob + inp *= mask.astype(inp.dtype) + if rescale: + inp *= 1 / (1 - drop_prob) + return inp + + +def identity(inp: Tensor) -> Tensor: + """applies an identity transform to the input tensor. + + :param inp: The input tensor + """ + op = builtin.Identity() + (data,) = utils.convert_inputs(inp) + (output,) = apply(op, data) + return output + + +def embedding( + input: Tensor, + weight: Tensor, + padding_idx: Optional[int] = None, + max_norm: Optional[float] = None, + norm_type: Optional[float] = None, +): + """ + Applies lookup table for embedding. + + :param input: the tensor with indices. + :param weight: the learnable weights which embedding from. + :param padding_idx: should be set to None, not support now. + :param max_norm: should be set to None, not support now. + :param norm_type: should be set to None, not support now. + + + Refer to :class:`~.Embedding` for more information. + """ + if padding_idx is not None: + raise ValueError("Not support padding_idx Now!") + if max_norm is not None or norm_type is not None: + raise ValueError("Not support weight normlization Now!") + + dest_shp = list(input.shape) + [weight.shape[-1]] + return weight[input.reshape(-1)].reshape(dest_shp) + + +def roi_pooling( + inp: Tensor, + rois: Tensor, + output_shape: Union[int, tuple, list], + mode: str = "max", + scale: float = 1.0, +) -> Tensor: + """ + Apply roi pooling on input feature + + :param inp: tensor that represents the input feature, (N, C, H, W) images + :param rois: (K, 5) boxes. First column is the index into N. The other 4 columns are xyxy + :param output_shape: (height, width) of output rois feature + :param mode: "max" or "average", use max/average align just like max/average pooling. Default: ``"max"`` + :param scale: scale the input boxes by this number. Default: 1.0 + :return: (K, C, output_shape[0], output_shape[1]) feature of rois + """ + assert mode in ["max", "average"], "only max/average mode is supported" + if isinstance(output_shape, int): + output_shape = (output_shape, output_shape) + + op = builtin.ROIPooling(mode=mode, scale=scale) + result, _ = apply( + op, inp, rois, Tensor(output_shape, dtype="int32", device=inp.device) + ) + return result + + +def roi_align( + input: Tensor, + rois: Tensor, + output_shape: Union[int, tuple, list], + mode: str = "average", + spatial_scale: float = 1.0, + sample_points: Union[int, tuple, list] = 2, + aligned: bool = True, +) -> Tensor: + """ + Apply roi align on input feature + + :param input: tensor that represents the input feature, (N, C, H, W) images + :param rois: (N, 5) boxes. First column is the index into N. The other 4 columns are xyxy + :param output_shape: (height, width) shape of output rois feature. + :param mode: "max" or "average", use max/average align just like max/average pooling. Default: ``"average"`` + :param spatial_scale: scale the input boxes by this number. Default: 1.0 + :param sample_points: number of inputs samples to take for each output sample. + 0 to take samples densely. Default: 2 + :param aligned: wheather align the input feature, with `aligned=True`, + we first appropriately scale the ROI and then shift it by -0.5. Default: True + """ + assert mode in ["max", "average"], "only max/average mode is supported" + if isinstance(output_shape, int): + output_shape = (output_shape, output_shape) + pooled_height, pooled_width = output_shape + if isinstance(sample_points, int): + sample_points = (sample_points, sample_points) + sample_height, sample_width = sample_points + offset = 0.5 if aligned else 0.0 + + op = builtin.ROIAlign( + mode=mode, + format="NCHW", + spatial_scale=spatial_scale, + offset=offset, + pooled_height=pooled_height, + pooled_width=pooled_width, + sample_height=sample_height, + sample_width=sample_width, + ) + result, *_ = apply(op, input, rois) + return result + + +def assert_equal( + get: Tensor, expect: Tensor, max_err: float = 1e-4, verbose: bool = False +) -> Tensor: + r""" + Asserts that ``get`` equals to ``expect``, and returns value of ``expect``. + + :param get: tensor to be checked. + :param expect: tensor with expected values. + :param max_err: tolerance that two float values are asserted equal. Default: 1e-4 + :param verbose: whether to print details if two tensors are not equal. Default: False + + Examples: + + .. testcode:: + + import megengine.functional as F + from megengine import tensor + + get = tensor([1.0, 2.0]) + max_err = 0.1 + expect = get + max_err / 2.0 + val = F.assert_equal(expect, get, max_err=max_err) + print(val.numpy()) + + Outputs: + + .. testoutput:: + + [1.05 2.05] + + """ + raise NotImplementedError + # op = builtin.AssertEqual(maxerr=max_err, verbose=verbose) + # result, = apply(op, get, expect) + # return result + + +def indexing_one_hot( + src: Tensor, index: Tensor, axis: int = 1, keepdims=False +) -> Tensor: + r""" + One-hot indexing for some axis. + + :param src: input data tensor. + :param index: index tensor. + :param axis: the axis on src for which values in index index. Default: 1 + :param keepdims: whether not to remove the axis in result. Default: ``False`` + + Examples: + + .. testcode:: + + import megengine.functional as F + from megengine import tensor + + src = tensor([[1.0, 2.0]]) + index = tensor([0]) + val = F.indexing_one_hot(src, index) + print(val.numpy()) + + .. testoutput:: + + [1.] + + """ + op = builtin.IndexingOneHot(axis=axis) + (result,) = apply(op, src, index) + if not keepdims: + result = remove_axis(result, axis) + return result + + +def nms(boxes: Tensor, iou_thresh: float, scores: Optional[Tensor] = None) -> Tensor: + r""" + Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union (IoU). + + :param boxes: tensor of shape ``(N, 4)``; the boxes to perform nms on; each box is expected to be in (x1, y1, x2, y2) format. + :param iou_thresh: iou threshold for overlapping. + :param scores: tensor of shape ``(N,)``, the score of boxes. + :return: indices of the elements that have been kept by NMS. + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x = np.zeros((100,4)) + np.random.seed(42) + x[:,:2] = np.random.rand(100,2)*20 + x[:,2:] = np.random.rand(100,2)*20 + 100 + scores = tensor(np.random.rand(100)) + inp = tensor(x) + result = F.nms(inp, iou_thresh=0.7, scores=scores) + print(result.numpy()) + + Outputs: + + .. testoutput:: + + [75 69] + + """ + assert ( + boxes.ndim == 2 and boxes.shape[1] == 4 + ), "the expected shape of boxes is (N, 4)" + + sorted_idx = None + if not scores is None: + assert scores.ndim == 1, "the expected shape of scores is (N,)" + sorted_idx = argsort(scores, descending=True) + boxes = boxes[sorted_idx] + max_output = boxes.shape[0] + + op = builtin.NMSKeep(iou_thresh, max_output) + inp = utils.convert_inputs(boxes.reshape(1, -1, 4)) + indices, count = apply(op, *inp) + indices = indices[0][: count.item()] + ret = sorted_idx[indices] if sorted_idx is not None else indices + return ret + + +def batched_nms( + boxes: Tensor, iou_thresh: float, idxs: Tensor, scores: Optional[Tensor] = None +) -> Tensor: + r""" + Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union (IoU). + + :param boxes: tensor of shape ``(N, 4)``; the boxes to perform nms on; each box is expected to be in (x1, y1, x2, y2) format + :param iou_thresh: iou threshold for overlapping + :param idxs: tensor of shape ``(N,)``, the class indexs of boxes in the batch. + :param scores: tensor of shape ``(N,)``, the score of boxes. + :return: indices and the number of the elements that have been kept by NMS + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x = np.zeros((100,4)) + np.random.seed(42) + x[:,:2] = np.random.rand(100,2)*20 + x[:,2:] = np.random.rand(100,2)*20 + 100 + scores = tensor(np.random.rand(100)) + idxs = tensor(np.random.randint(0, 10, 100)) + inp = tensor(x) + result = F.batched_nms(inp, iou_thresh=0.6, idxs=idxs, scores=scores) + print(result.numpy()) + + Outputs: + + .. testoutput:: + + [75 41 99 98 69 64 11 27 35 18] + + """ + assert ( + boxes.ndim == 2 and boxes.shape[1] == 4 + ), "the expected shape of boxes is (N, 4)" + max_coordinate = boxes.max() + offsets = idxs.astype("float32") * (max_coordinate + 1) + boxes = boxes + offsets.reshape(-1, 1).broadcast(boxes.shape[0], 4) + + sorted_idx = None + if not scores is None: + assert scores.ndim == 1, "the expected shape of scores is (N,)" + sorted_idx = argsort(scores, descending=True) + boxes = boxes[sorted_idx] + max_output = boxes.shape[0] + + op = builtin.NMSKeep(iou_thresh, max_output) + inp = utils.convert_inputs(boxes.reshape(1, -1, 4)) + indices, count = apply(op, *inp) + indices = indices[0][: count.item()] + ret = sorted_idx[indices] if sorted_idx is not None else indices + return ret diff --git a/imperative/python/megengine/functional/quantized.py b/imperative/python/megengine/functional/quantized.py new file mode 100644 index 0000000000000000000000000000000000000000..9bee9311c05126b196de3c60eae55db604aa8d75 --- /dev/null +++ b/imperative/python/megengine/functional/quantized.py @@ -0,0 +1,83 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# pylint: disable=too-many-lines +from typing import Tuple, Union + +from ..core.ops import builtin +from ..core.tensor.core import apply +from ..tensor import Tensor +from .debug_param import get_conv_execution_strategy +from .types import _pair, _pair_nonzero + + +def conv_bias_activation( + inp: Tensor, + weight: Tensor, + bias: Tensor, + dtype=None, + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + groups: int = 1, + format="NCHW", + nonlinear_mode="IDENTITY", + conv_mode="CROSS_CORRELATION", + compute_mode="DEFAULT", +) -> Tensor: + """ convolution bias with activation operation, only for inference. + + :param inp: The feature map of the convolution operation + :param weight: The convolution kernel + :param bias: The bias added to the result of convolution + :param stride: Stride of the 2D convolution operation. Default: 1 + :param padding: Size of the paddings added to the input on both sides of its + spatial dimensions. Only zero-padding is supported. Default: 0 + :param dilation: Dilation of the 2D convolution operation. Default: 1 + :param groups: number of groups to divide input and output channels into, + so as to perform a "grouped convolution". When ``groups`` is not 1, + ``in_channels`` and ``out_channels`` must be divisible by ``groups``, + and the shape of weight should be ``(groups, out_channel // groups, + in_channels // groups, height, width)``. + :type conv_mode: string or :class:`P.Convolution.Mode` + :param conv_mode: Supports 'CROSS_CORRELATION' or 'CONVOLUTION'. Default: + 'CROSS_CORRELATION'. + :param dtype: Support for np.dtype, Default: + np.int8. + :param scale: scale if use quantization, Default: + 0.0. + :param zero_point: scale if use quantization quint8, Default: + 0.0. + :type compute_mode: string or + :class:`P.Convolution.ComputeMode` + :param compute_mode: When set to 'DEFAULT', no special requirements will be + placed on the precision of intermediate results. When set to 'FLOAT32', + Float32 would be used for accumulator and intermediate result, but only + effective when input and output are of Float16 dtype. + + """ + ph, pw = _pair(padding) + sh, sw = _pair_nonzero(stride) + dh, dw = _pair_nonzero(dilation) + sparse_type = "DENSE" if groups == 1 else "GROUP" + op = builtin.ConvBiasForward( + stride_h=sh, + stride_w=sw, + pad_h=ph, + pad_w=pw, + dilate_h=dh, + dilate_w=dw, + dtype=dtype, + format=format, + strategy=get_conv_execution_strategy(), + nonlineMode=nonlinear_mode, + mode=conv_mode, + compute_mode=compute_mode, + sparse=sparse_type, + ) + (outputs,) = apply(op, inp, weight, bias) + return outputs diff --git a/imperative/python/megengine/functional/tensor.py b/imperative/python/megengine/functional/tensor.py new file mode 100644 index 0000000000000000000000000000000000000000..15c26bf0aed389aad07d065d8c66e48c98b4524f --- /dev/null +++ b/imperative/python/megengine/functional/tensor.py @@ -0,0 +1,934 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import functools +import math +from itertools import accumulate +from typing import Iterable, List, Optional, Sequence, Tuple, Union + +import numpy as np + +from ..core._imperative_rt import CompNode +from ..core.ops import builtin +from ..core.ops._internal import param_defs as P +from ..core.ops.special import Const +from ..core.tensor.core import TensorBase, TensorWrapperBase, apply +from ..core.tensor.utils import ( + astensor1d, + convert_inputs, + convert_single_value, + dtype_promotion, + get_device, +) +from ..device import get_default_device +from ..tensor import Tensor +from .elemwise import ceil + +__all__ = [ + "add_axis", # expand_dims + "arange", + "broadcast", + "concat", + "cond_take", + "dimshuffle", # transpose, permute + "expand_dims", + "full", + "full_like", + "gather", + "eye", + "linspace", + "ones", + "ones_like", + "remove_axis", # squeeze + "split", + "squeeze", + "stack", + "reshape", + "scatter", + "where", + "zeros", + "zeros_like", + "param_pack_split", + "param_pack_concat", +] + + +def eye(n: int, *, dtype=None, device: Optional[CompNode] = None) -> Tensor: + """ + Returns a 2D tensor with ones on the diagonal and zeros elsewhere. + + :param n: The number of rows + :param m: The number of columns. Default: None + :param dtype: The data type. Default: None + :param device: Compute node of the matrix. Default: None + :param comp_graph: Compute graph of the matrix. Default: None + :return: The eye matrix + + Examples: + + .. testcode:: + + import numpy as np + import megengine.functional as F + + data_shape = (4, 6) + n, m = data_shape + out = F.eye(n, m, dtype=np.float32) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [[1. 0. 0. 0. 0. 0.] + [0. 1. 0. 0. 0. 0.] + [0. 0. 1. 0. 0. 0.] + [0. 0. 0. 1. 0. 0.]] + + """ + op = builtin.Eye(k=0, dtype=dtype, comp_node=device) + (result,) = apply(op, Tensor(n, dtype="int32", device=device)) + return result + + +def full(shape, value, dtype="float32", device=None): + if device is None: + device = get_default_device() + (x,) = Const(value, dtype=dtype, device=device)( + Tensor(value, dtype=dtype, device=device) + ) + return broadcast(x, shape) + + +def ones(shape, dtype="float32", device=None): + return full(shape, 1.0, dtype=dtype, device=device) + + +def zeros(shape, dtype="float32", device=None): + return full(shape, 0.0, dtype=dtype, device=device) + + +def zeros_like(inp: Tensor) -> Tensor: + r""" + Returns a zero tensor with the same shape as input tensor + + :param inp: input tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + inp = tensor(np.arange(1, 7, dtype=np.int32).reshape(2,3)) + out = F.zeros_like(inp) + print(out.numpy()) + + .. testoutput:: + + [[0 0 0] + [0 0 0]] + + """ + return zeros(inp.shape, dtype=inp.dtype, device=inp.device) + + +def ones_like(inp: Tensor) -> Tensor: + r""" + Returns a identity tensor with the same shape as input tensor + """ + return ones(inp.shape, dtype=inp.dtype, device=inp.device) + + +def full_like(inp: Tensor, value: Union[int, float]) -> Tensor: + r""" + Returns a tensor filled with value val with the same shape as input tensor + """ + return full(inp.shape, value, dtype=inp.dtype, device=inp.device) + + +def broadcast(inp: Tensor, shape: Union[int, Iterable[int]]) -> Tensor: + """ + Broadcast a tensor to ``shape`` + + :param inp: The input tensor + :param shape: The target shape + :return: The output tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + data = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) + out = F.broadcast(data, (4, 2, 3)) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [[[0. 1. 2.] + [3. 4. 5.]] + + [[0. 1. 2.] + [3. 4. 5.]] + + [[0. 1. 2.] + [3. 4. 5.]] + + [[0. 1. 2.] + [3. 4. 5.]]] + + """ + shape = astensor1d(shape, inp, dtype="int32", device=inp.device) + (result,) = apply(builtin.Broadcast(), inp, shape) + return result + + +def concat( + inps: Iterable[Tensor], axis: int = 0, device: Optional[CompNode] = None, +) -> Tensor: + r""" + Concat some tensors + + :param inps: Input tensors to concat + :param axis: the dimension over which the tensors are concatenated. Default: 0 + :param device: The comp node output on. Default: None + :param comp_graph: The graph in which output is. Default: None + :return: The output tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + data1 = tensor(np.arange(0, 6, dtype=np.float32).reshape((2, 3))) + data2 = tensor(np.arange(6, 12, dtype=np.float32).reshape((2, 3))) + out = F.concat([data1, data2]) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [[ 0. 1. 2.] + [ 3. 4. 5.] + [ 6. 7. 8.] + [ 9. 10. 11.]] + + """ + dtype = dtype_promotion(inps) + device = get_device(inps) + + def convert(x): + return convert_single_value(x, inps, dtype=dtype) + + inps = tuple(map(convert, inps)) + (result,) = apply(builtin.Concat(axis=axis, comp_node=device.to_c()), *inps) + return result + + +def stack(inps, axis=0): + """Concats a sequence of tensors along a new axis. + The input tensors must have the same shape. + + :param inps: The input tensors. + :param axis: Which axis will be concatenated. + :return: The output concatenated tensor. + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x1 = tensor(np.arange(0, 6, dtype=np.float32).reshape((2, 3))) + x2 = tensor(np.arange(6, 12, dtype=np.float32).reshape((2, 3))) + out = F.stack([x1, x2], axis=0) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [[[ 0. 1. 2.] + [ 3. 4. 5.]] + + [[ 6. 7. 8.] + [ 9. 10. 11.]]] + + """ + shapes = {arr.shape for arr in inps} + if len(shapes) != 1: + raise ValueError("All input tensors must have the same shape") + + inps = [add_axis(inp, axis=axis) for inp in inps] + return concat(inps, axis=axis) + + +def split(inp, nsplits_or_sections, axis=0): + """Splits the input tensor into several smaller tensors. + When nsplits_or_sections is int, the last tensor may be smaller than others. + + :param inp: The input tensor. + :param nsplits_or_sections: Number of sub tensors or section information list. + :param axis: Which axis will be splited. + :return: The output tensor list. + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + x = tensor(np.random.random((2,3,4,5)), dtype=np.float32) + out = F.split(x, 2, axis=3) + print(out[0].shape, out[1].shape) + + Outputs: + + .. testoutput:: + + (2, 3, 4, 3) (2, 3, 4, 2) + + """ + sub_tensors = [] + sections = [] + + def swapaxis(inp, src, dst): + if src == dst: + return inp + shape = [i for i in range(len(inp.shape))] + shape[src] = dst + shape[dst] = src + return inp.transpose(shape) + + inp = swapaxis(inp, 0, axis) + + if isinstance(nsplits_or_sections, int): + incr_step = math.ceil(inp.shape[0] / nsplits_or_sections) + while incr_step < inp.shape[0]: + sections.append(incr_step) + incr_step += nsplits_or_sections + else: + sections = nsplits_or_sections + + st = 0 + for se in sections: + sub_tensors.append(swapaxis(inp[st:se], axis, 0)) + st = se + + if st < inp.shape[0]: + sub_tensors.append(swapaxis(inp[st:], axis, 0)) + + return sub_tensors + + +def _get_idx(index, axis): + index_dims = len(index.shape) + idx = [] + for i in range(index_dims): + if i != axis: + shape = [1] * index_dims + shape[i] = index.shape[i] + arange = linspace( + 0, index.shape[i] - 1, index.shape[i], device=index.device, + ) + arange = ( + arange.reshape(*shape) + .broadcast(index.shape) + .reshape(-1) + .astype(np.int32) + ) + idx.append(arange) + else: + idx.append(index.reshape(-1)) + return tuple(idx) + + +def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor: + r""" + Gather data from :attr:`inp` on :attr:`axis` using :attr:`index`. + + For a 3-D tensor, the output is specified by:: + + out[i][j][k] = inp[index[i][j][k]][j][k] # if axis == 0 + out[i][j][k] = inp[i][index[i][j][k]][k] # if axis == 1 + out[i][j][k] = inp[i][j][index[i][j][k]] # if axis == 2 + + if :attr:`inp` is an n-dimensional tensor with size + :math:`(x_0,x_1,...,x_{i-1},x_i,x_{i+1},...,x_{n-1})` and axis=i, + then :attr:`index` must be an n-dimensional tensor with size + :math:`(x_0,x_1,...,x_{i-1},y,x_{i+1},...,x_{n-1})` where :math:`y\ge 1` and + output will have the same size as :attr:`index`. + + + :param inp: the source tensor + :param axis: the axis along which to index + :param index: the indices of elements to gather + + Examples: + + .. testcode:: + + import megengine.functional as F + from megengine import tensor + + inp = tensor([ + [1,2], [3,4], [5,6], + ]) + index = tensor([[0,2], [1,0]]) + oup = F.gather(inp, 0, index) + print(oup.numpy()) + + Outputs: + + .. testoutput:: + + [[1 6] + [3 2]] + + """ + input_shape = inp.shape + index_shape = index.shape + input_dims = len(input_shape) + index_dims = len(index_shape) + if input_dims != index_dims: + raise ValueError( + "The index tensor must have same dimensions as input tensor, " + "But the input dims:{}, the index dims:{}".format(input_dims, index_dims) + ) + + if axis < 0 or axis >= input_dims: + raise ValueError( + "Index axis {} is output of bounds, should in range [0 {})".format( + axis, input_dims + ) + ) + + for i in range(input_dims): + if i != axis and input_shape[i] != index_shape[i]: + raise ValueError( + "The input {} and index {} must have the same size apart from axis {}".format( + input_shape, index_shape, axis + ) + ) + + idx = _get_idx(index, axis) + return inp[idx].reshape(index.shape) # pylint: disable=no-member + + +def scatter(inp: Tensor, axis: int, index: Tensor, source: Tensor) -> Tensor: + r""" + Writes all values from the tensor :attr:`source` into :attr:`inp` at the indices specified in the :attr:`index` tensor. + + For each value in :attr:`source`, its output index is specified by its index + in :attr:`source` for ``axis != dimension`` and by the corresponding value in + :attr:`index` for ``axis = dimension``. + + For a 3-D tensor, :attr:`inp` is updated as:: + + inp[index[i][j][k]][j][k] = source[i][j][k] # if axis == 0 + inp[i][index[i][j][k]][k] = source[i][j][k] # if axis == 1 + inp[i][j][index[i][j][k]] = source[i][j][k] # if axis == 2 + + :attr:`inp`, :attr:`index` and :attr:`source` should have same number of dimensions. + + It is also required that ``source.shape(d) <= inp.shape(d)`` and ``index.shape(d) == source.shape(d)`` + for all dimensions ``d``. + + Moreover, the values of :attr:`index` must be between ``0`` and ``inp.shape(axis) - 1`` inclusive. + + .. note:: + Please notice that, due to performance issues, the result is uncertain on the GPU device + if scatter difference positions from source to the same destination position + regard to index tensor. + + Show the case using the following examples, the oup[0][2] is maybe + from source[0][2] which value is 0.2256 or source[1][2] which value is 0.5339 + if set the index[1][2] from 1 to 0. + + :param inp: the inp tensor which to be scattered + :param axis: the axis along which to index + :param index: the indices of elements to scatter + :param source: the source element(s) to scatter + + Examples: + + .. testcode:: + + import numpy as np + import megengine.functional as F + from megengine import tensor + + inp = tensor(np.zeros(shape=(3,5),dtype=np.float32)) + source = tensor([[0.9935,0.9465,0.2256,0.8926,0.4396],[0.7723,0.0718,0.5939,0.357,0.4576]]) + index = tensor([[0,2,0,2,1],[2,0,1,1,2]]) + oup = F.scatter(inp, 0, index,source) + print(oup.numpy()) + + Outputs: + + .. testoutput:: + + [[0.9935 0.0718 0.2256 0. 0. ] + [0. 0. 0.5939 0.357 0.4396] + [0.7723 0.9465 0. 0.8926 0.4576]] + + """ + input_shape = inp.shape + index_shape = index.shape + source_shape = source.shape + input_dims = len(input_shape) + index_dims = len(index_shape) + source_dims = len(source_shape) + + if input_dims != index_dims or input_dims != source_dims: + raise ValueError("The input, source and index tensor must have same dimensions") + + if axis < 0 or axis >= input_dims: + raise ValueError( + "Index axis {} is output of bounds, should in range [0 {})".format( + axis, input_dims + ) + ) + + for i in range(source_dims): + if source_shape[i] > input_shape[i]: + raise ValueError( + "The each shape size for source {} must be less than or equal to input {} ".format( + source_shape, input_shape + ) + ) + + for i in range(index_dims): + if index_shape[i] != source_shape[i]: + raise ValueError( + "The each shape size for index {} must be equal to source {} ".format( + index_shape, source_shape + ) + ) + + for i in range(index_dims): + if i != axis and index_shape[i] > input_shape[i]: + raise ValueError( + "The index {} must be less than or equal to input {} size apart from axis {}".format( + index_shape, input_shape, axis + ) + ) + + idx = _get_idx(index, axis) + inp[idx] = source.flatten() + return inp + + +def where(mask: Tensor, x: Tensor, y: Tensor) -> Tensor: + r""" + Select elements either from Tensor x or Tensor y, according to mask. + + .. math:: + + \textrm{out}_i = x_i \textrm{ if } \textrm{mask}_i \textrm{ is True else } y_i + + :param mask: a mask used for choosing x or y + :param x: the first choice + :param y: the second choice + + Examples: + + .. testcode:: + + from megengine import tensor + import megengine.functional as F + mask = tensor(np.array([[1, 0], [0, 1]], dtype=np.int32)) + x = tensor(np.array([[1, np.inf], [np.nan, 4]], + dtype=np.float32)) + y = tensor(np.array([[5, 6], [7, 8]], dtype=np.float32)) + out = F.where(mask, x, y) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [[1. 6.] + [7. 4.]] + """ + raise NotImplementedError + # v0, index0 = mgb.opr.cond_take( + # x, mask, mode=P.CondTake.Mode.EQ, val=1 + # ) + # v1, index1 = mgb.opr.cond_take( + # y, mask, mode=P.CondTake.Mode.EQ, val=0 + # ) + # out = x.flatten() + # index = mgb.opr.concat(index0, index1, axis=0) + # v = mgb.opr.concat(v0, v1, axis=0) + # out = mgb.opr.set_advanced_indexing(out, v)[index] + # out = out.reshape(x.shape) + # return out + + +def cond_take(mask: Tensor, x: Tensor) -> Tensor: + r""" + Take elements from data if specific condition is satisfied on mask. This operator has two outputs: the first is the elements taken, and the second is the indices corresponding to those elements; they are both 1-dimensional. High-dimension input would first be flattened. + + :param mask: condition param; must be the same shape with data + :param x: input tensor from which to take elements + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + mask = tensor(np.array([[True, False], [False, True]], dtype=np.bool_)) + x = tensor(np.array([[1, np.inf], [np.nan, 4]], + dtype=np.float32)) + v, index = F.cond_take(mask, x) + print(v.numpy(), index.numpy()) + + Outputs: + + .. testoutput:: + + Tensor([1. 4.]) Tensor([0 3], dtype=int32) + + """ + if not isinstance(x, (TensorWrapperBase, TensorBase)): + raise TypeError("input must be a tensor") + if not isinstance(mask, (TensorWrapperBase, TensorBase)): + raise TypeError("mask must be a tensor") + if mask.dtype != np.bool_: + raise ValueError("mask must be bool") + if x.device != mask.device: + raise ValueError("ambiguous device: {} vs {}".format(x.device, mask.device)) + + op = builtin.CondTake() + v, index = apply(op, x, mask) + return v, index + + +def dimshuffle(inp: Tensor, pattern: Iterable[int]) -> Tensor: + r""" + Swap shapes and strides according to given pattern + + :param inp: Input tensor + :param pattern: a list of integers including 0, 1, ... , ``ndim``-1, and any number of ``'x'`` char in dimensions where this tensor should be broadcasted. For examples: + + * (``'x'``) -> make a 0d (scalar) into a 1d vector + * (0, 1) -> identity for 2d vectors + * (1, 0) -> inverts the first and second dimensions + * (``'x'``, 0) -> make a row out of a 1d vector (N to 1xN) + * (0, ``'x'``) -> make a column out of a 1d vector (N to Nx1) + * (2, 0, 1) -> AxBxC to CxAxB + * (0, ``'x'``, 1) -> AxB to Ax1xB + * (1, ``'x'``, 0) -> AxB to Bx1xA + * (1,) -> This remove dimensions 0. It must be a broadcastable dimension (1xA to A) + + :return: The output tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + x = tensor(np.array([[1, 1], [0, 0]], dtype=np.int32)) + out = F.dimshuffle(x, (1, 0)) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [[1 0] + [1 0]] + + """ + op = builtin.Dimshuffle(pattern) + (inp,) = convert_inputs(inp) + (result,) = apply(op, inp) + return result + + +def reshape(inp: Tensor, target_shape: Iterable[int]) -> Tensor: + r""" + Reshape a tensor to given target shape; total number of logical elements must + remain unchanged + + :param inp: Input tensor + :param target_shape: target shape, the components would be concatenated to form the + target shape, and it can contain an element of -1 representing unspec_axis. + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + x = tensor(np.arange(12, dtype=np.int32)) + out = F.reshape(x, (3, 2, 2)) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [[[ 0 1] + [ 2 3]] + + [[ 4 5] + [ 6 7]] + + [[ 8 9] + [10 11]]] + + """ + if isinstance(target_shape, (TensorBase, TensorWrapperBase)): + target_shape = target_shape.numpy() + target_shape = tuple(map(int, target_shape)) + unspec_axis = None + for i, s in enumerate(target_shape): + if s < 0: + if s != -1: + raise ValueError("expect shape[{}] >= -1, got {}".format(i, s)) + if unspec_axis is not None: + raise ValueError("multiple -1 in shape: {} & {}".format(unspec_axis, i)) + unspec_axis = i + + # TODO: device should be None (cpu) + (target_shape,) = Const(target_shape, dtype="int32", device=inp.device)(inp) + if unspec_axis is None: + op = builtin.Reshape() + else: + op = builtin.Reshape(unspec_axis=unspec_axis) + (x,) = apply(op, inp, target_shape) + return x + + +transpose = dimshuffle + + +AxisAddRemove = builtin.AxisAddRemove +AxisDesc = AxisAddRemove.AxisDesc + + +def add_axis(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: + r""" + Add dimension before given axis. + + :param inp: Input tensor + :param axis: Place of new axes + :return: The output tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + x = tensor([1, 2]) + out = F.add_axis(x, 0) + print(out.shape) + + Outputs: + + .. testoutput:: + + (1, 2) + + """ + Param = AxisAddRemove.Param + + def get_axes(): + try: + return [int(axis)] + except (TypeError, ValueError): + pass + return list(map(int, axis)) + + axis = get_axes() + ndim = inp.ndim + len(axis) + axis = sorted(i + ndim if i < 0 else i for i in axis) + + param = Param(*map(AxisDesc.make_add, axis)) + op = AxisAddRemove(param=param) + (result,) = apply(op, inp) + return result + + +expand_dims = add_axis + + +def remove_axis(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: + r""" + Remove dimension of shape 1. + + :param inp: Input tensor + :param axis: Place of axis to be removed + :return: The output tensor + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + x = tensor(np.array([1, 2], dtype=np.int32).reshape(1, 1, 2, 1)) + out = F.remove_axis(x, 3) + print(out.shape) + + Outputs: + + .. testoutput:: + + (1, 1, 2) + + """ + Param = AxisAddRemove.Param + + def get_axes(): + if axis is None: + return [i for i, s in enumerate(inp.shape) if s == 1] + try: + return [int(axis)] + except (TypeError, ValueError): + pass + return list(map(int, axis)) + + axis = get_axes() + axis = sorted(i + inp.ndim if i < 0 else i for i in axis) + axis = [a - i for i, a in enumerate(axis)] + + param = Param(*map(AxisDesc.make_remove, axis)) + op = AxisAddRemove(param=param) + (result,) = apply(op, inp) + return result + + +squeeze = remove_axis + + +def linspace( + start: Union[int, float, Tensor], + stop: Union[int, float, Tensor], + num: Union[int, Tensor], + dtype="float32", + device: Optional[CompNode] = None, +) -> Tensor: + r""" + Return equally spaced numbers over a specified interval + + :param start: Starting value of the squence, shoule be scalar + :param stop: The last value of the squence, shoule be scalar + :param num: number of values to generate + :param dtype: result data type + :return: The generated tensor + + Examples: + + .. testcode:: + + import numpy as np + import megengine.functional as F + + a = F.linspace(3,10,5) + print(a.numpy()) + + .. testoutput:: + + [ 3. 4.75 6.5 8.25 10. ] + + """ + start = Tensor(start, device=device) + stop = Tensor(stop, device=device) + num = Tensor(num, device=device) + + device = device if device is None else device.to_c() + op = builtin.Linspace(comp_node=device) + (result,) = apply(op, start, stop, num) + if np.dtype(dtype) == np.int32: + return result.astype(dtype) + return result + + +def arange( + start: Union[int, float, Tensor], + end: Union[int, float, Tensor], + step: Union[int, float, Tensor] = 1, + dtype="float32", + device: Optional[CompNode] = None, +) -> Tensor: + r""" + Returns a Tensor with values from `start` to `end` with adjacent interval `step` + + :param start: starting value of the squence, shoule be scalar + :param end: ending value of the squence, shoule be scalar + :param step: the gap between each pair of adjacent values. Default 1 + :param dtype: result data type + :return: The generated tensor + + Examples: + + .. testcode:: + + import numpy as np + import megengine.functional as F + + a = F.arange(1, 5, 1) + print(a.numpy()) + + .. testoutput:: + + [1. 2. 3. 4.] + + """ + if isinstance(start, Tensor): + start = start.astype("float32") + if isinstance(end, Tensor): + end = end.astype("float32") + if isinstance(step, Tensor): + step = step.astype("float32") + num = ceil(Tensor((end - start) / step, device=device)) + stop = start + step * (num - 1) + result = linspace(start, stop, num, device=device) + if np.dtype(dtype) == np.int32: + return result.astype(dtype) + return result + + +def param_pack_split(inp: Tensor, offsets: List, shapes: List) -> Tensor: + op = builtin.ParamPackSplit() + op.offsets = offsets + op.shapes = shapes + return apply(op, inp) + + +def param_pack_concat(inps: List, offsets: Tensor, offsets_val: List) -> Tensor: + op = builtin.ParamPackConcat() + op.offsets = offsets_val + return apply(op, *inps, offsets)[0] diff --git a/imperative/python/megengine/functional/types.py b/imperative/python/megengine/functional/types.py new file mode 100644 index 0000000000000000000000000000000000000000..465ca03ce68f02d3944ddb87f5b0d4abde5ef9f9 --- /dev/null +++ b/imperative/python/megengine/functional/types.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections +import functools + + +def get_ndtuple(value, *, n, allow_zero=True): + r"""Converts possibly 1D tuple to nd tuple + + :type allow_zero: bool + :param allow_zero: whether to allow zero tuple value""" + if not isinstance(value, collections.Iterable): + value = int(value) + value = tuple([value for i in range(n)]) + else: + assert len(value) == n, "tuple len is not equal to n: {}".format(value) + spatial_axis = map(int, value) + value = tuple(spatial_axis) + if allow_zero: + minv = 0 + else: + minv = 1 + assert min(value) >= minv, "invalid value: {}".format(value) + return value + + +_single = functools.partial(get_ndtuple, n=1, allow_zero=True) +_pair = functools.partial(get_ndtuple, n=2, allow_zero=True) +_pair_nonzero = functools.partial(get_ndtuple, n=2, allow_zero=False) +_triple = functools.partial(get_ndtuple, n=3, allow_zero=True) +_quadruple = functools.partial(get_ndtuple, n=4, allow_zero=True) diff --git a/imperative/python/megengine/functional/utils.py b/imperative/python/megengine/functional/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..85c85d43a84c9cb6caf4e7009abac9414b42c8dc --- /dev/null +++ b/imperative/python/megengine/functional/utils.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections +from typing import Iterable, Union + +import numpy as np + +from ..core.ops.builtin import Copy +from ..core.tensor import Tensor +from ..core.tensor.core import apply +from .math import topk as _topk +from .tensor import dimshuffle as _dimshuffle + + +def accuracy( + logits: Tensor, target: Tensor, topk: Union[int, Iterable[int]] = 1 +) -> Union[Tensor, Iterable[Tensor]]: + r""" + Calculate the classification accuracy given predicted logits and ground-truth labels. + + :param logits: Model predictions of shape [batch_size, num_classes], + representing the probability (likelyhood) of each class. + :param target: Ground-truth labels, 1d tensor of int32 + :param topk: Specifies the topk values, could be an int or tuple of ints. Default: 1 + :return: Tensor(s) of classification accuracy between 0.0 and 1.0 + + Examples: + + .. testcode:: + + import numpy as np + from megengine import tensor + import megengine.functional as F + + logits = tensor(np.arange(80, dtype=np.int32).reshape(8,10)) + target = tensor(np.arange(8, dtype=np.int32)) + top1, top5 = F.accuracy(logits, target, (1, 5)) + print(top1.numpy(), top5.numpy()) + + Outputs: + + .. testoutput:: + + [0.] [0.375] + """ + if isinstance(topk, int): + topk = (topk,) + _, pred = _topk(logits, k=max(topk), descending=True) + accs = [] + for k in topk: + correct = pred[:, :k].detach() == _dimshuffle(target, (0, "x")).broadcast( + target.shape[0], k + ) + accs.append(correct.astype(np.float32).sum() / target.shape[0]) + if len(topk) == 1: # type: ignore[arg-type] + accs = accs[0] + return accs + + +def zero_grad(inp: Tensor) -> Tensor: + r""" + Returns a tensor which is treated as constant during backward gradient calcuation, + i.e. its gradient is zero. + + :param inp: Input tensor. + + See implementation of :func:`~.softmax` for example. + """ + print("zero_grad is obsoleted, please use detach instead") + raise NotImplementedError + + +def copy(inp, cn): + return apply(Copy(comp_node=cn), inp)[0] diff --git a/imperative/python/megengine/hub/__init__.py b/imperative/python/megengine/hub/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f07c3979919e4f2bf2dd6efbbdb19dec7e3bb294 --- /dev/null +++ b/imperative/python/megengine/hub/__init__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .hub import ( + help, + import_module, + list, + load, + load_serialized_obj_from_url, + pretrained, +) diff --git a/imperative/python/megengine/hub/const.py b/imperative/python/megengine/hub/const.py new file mode 100644 index 0000000000000000000000000000000000000000..5f53420bed8aa290f29ea958e3129c80e779a388 --- /dev/null +++ b/imperative/python/megengine/hub/const.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +DEFAULT_BRANCH_NAME = "master" +HUBCONF = "hubconf.py" +HUBDEPENDENCY = "dependencies" +DEFAULT_GIT_HOST = "github.com" +ENV_MGE_HOME = "MGE_HOME" +ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME" +DEFAULT_CACHE_DIR = "~/.cache" +DEFAULT_PROTOCOL = "HTTPS" +HTTP_READ_TIMEOUT = 120 diff --git a/imperative/python/megengine/hub/exceptions.py b/imperative/python/megengine/hub/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..aab0a13452736d4d14d6e36141bedee68b5b16b4 --- /dev/null +++ b/imperative/python/megengine/hub/exceptions.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +class FetcherError(Exception): + """Base class for fetch related error.""" + + +class InvalidRepo(FetcherError): + """The repo provided was somehow invalid.""" + + +class InvalidGitHost(FetcherError): + """The git host provided was somehow invalid.""" + + +class GitPullError(FetcherError): + """A git pull error occurred""" + + +class GitCheckoutError(FetcherError): + """A git checkout error occurred""" + + +class InvalidProtocol(FetcherError): + """The protocol provided was somehow invalid""" diff --git a/imperative/python/megengine/hub/fetcher.py b/imperative/python/megengine/hub/fetcher.py new file mode 100644 index 0000000000000000000000000000000000000000..4f60b3ceeed1409a8b7f04ac31436bf3654749c6 --- /dev/null +++ b/imperative/python/megengine/hub/fetcher.py @@ -0,0 +1,300 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import hashlib +import os +import re +import shutil +import subprocess +from tempfile import NamedTemporaryFile +from typing import Tuple +from zipfile import ZipFile + +import requests +from tqdm import tqdm + +from megengine.utils.http_download import ( + CHUNK_SIZE, + HTTP_CONNECTION_TIMEOUT, + HTTPDownloadError, +) + +from ..distributed import is_distributed, synchronized +from ..logger import get_logger +from .const import DEFAULT_BRANCH_NAME, HTTP_READ_TIMEOUT +from .exceptions import GitCheckoutError, GitPullError, InvalidGitHost, InvalidRepo +from .tools import cd + +logger = get_logger(__name__) + +HTTP_TIMEOUT = (HTTP_CONNECTION_TIMEOUT, HTTP_READ_TIMEOUT) + +pattern = re.compile( + r"^(?:[a-z0-9]" # First character of the domain + r"(?:[a-z0-9-_]{0,61}[a-z0-9])?\.)" # Sub domain + hostname + r"+[a-z0-9][a-z0-9-_]{0,61}" # First 61 characters of the gTLD + r"[a-z]$" # Last character of the gTLD +) + + +class RepoFetcherBase: + @classmethod + def fetch( + cls, + git_host: str, + repo_info: str, + use_cache: bool = False, + commit: str = None, + silent: bool = True, + ) -> str: + raise NotImplementedError() + + @classmethod + def _parse_repo_info(cls, repo_info: str) -> Tuple[str, str, str]: + try: + branch_info = DEFAULT_BRANCH_NAME + if ":" in repo_info: + prefix_info, branch_info = repo_info.split(":") + else: + prefix_info = repo_info + repo_owner, repo_name = prefix_info.split("/") + return repo_owner, repo_name, branch_info + except ValueError: + raise InvalidRepo("repo_info: '{}' is invalid.".format(repo_info)) + + @classmethod + def _check_git_host(cls, git_host): + return cls._is_valid_domain(git_host) or cls._is_valid_host(git_host) + + @classmethod + def _is_valid_domain(cls, s): + try: + return pattern.match(s.encode("idna").decode("ascii")) + except UnicodeError: + return False + + @classmethod + def _is_valid_host(cls, s): + nums = s.split(".") + if len(nums) != 4 or any(not _.isdigit() for _ in nums): + return False + return all(0 <= int(_) < 256 for _ in nums) + + @classmethod + def _gen_repo_dir(cls, repo_dir: str) -> str: + return hashlib.sha1(repo_dir.encode()).hexdigest()[:16] + + +class GitSSHFetcher(RepoFetcherBase): + @classmethod + @synchronized + def fetch( + cls, + git_host: str, + repo_info: str, + use_cache: bool = False, + commit: str = None, + silent: bool = True, + ) -> str: + """ + Fetches git repo by SSH protocol + + :param git_host: + host address of git repo. + example: github.com + :param repo_info: + a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional + tag/branch. The default branch is ``master`` if not specified. + example: ``"brain_sdk/MegBrain[:hub]"`` + :param use_cache: + whether to use locally fetched code or completely re-fetch + :param commit: + commit id on github or gitlab + :param silent: + whether to accept the stdout and stderr of the subprocess with PIPE, instead of + displaying on the screen + :return: + directory where the repo code is stored + """ + if not cls._check_git_host(git_host): + raise InvalidGitHost("git_host: '{}' is malformed.".format(git_host)) + + repo_owner, repo_name, branch_info = cls._parse_repo_info(repo_info) + normalized_branch_info = branch_info.replace("/", "_") + repo_dir_raw = "{}_{}_{}".format( + repo_owner, repo_name, normalized_branch_info + ) + ("_{}".format(commit) if commit else "") + repo_dir = cls._gen_repo_dir(repo_dir_raw) + git_url = "git@{}:{}/{}.git".format(git_host, repo_owner, repo_name) + + if use_cache and os.path.exists(repo_dir): # use cache + logger.debug("Cache Found in %s", repo_dir) + return repo_dir + + if is_distributed(): + logger.warning( + "When using `hub.load` or `hub.list` to fetch git repositories\n" + " in DISTRIBUTED mode for the first time, processes are synchronized to\n" + " ensure that target repository is ready to use for each process.\n" + " Users are expected to see this warning no more than ONCE, otherwise\n" + " (very little chance) you may need to remove corrupt cache\n" + " `%s` and fetch again.", + repo_dir, + ) + + shutil.rmtree(repo_dir, ignore_errors=True) # ignore and clear cache + + logger.debug( + "Git Clone from Repo:%s Branch: %s to %s", + git_url, + normalized_branch_info, + repo_dir, + ) + + kwargs = ( + {"stderr": subprocess.PIPE, "stdout": subprocess.PIPE} if silent else {} + ) + if commit is None: + # shallow clone repo by branch/tag + p = subprocess.Popen( + [ + "git", + "clone", + "-b", + normalized_branch_info, + git_url, + repo_dir, + "--depth=1", + ], + **kwargs, + ) + cls._check_clone_pipe(p) + else: + # clone repo and checkout to commit_id + p = subprocess.Popen(["git", "clone", git_url, repo_dir], **kwargs) + cls._check_clone_pipe(p) + + with cd(repo_dir): + logger.debug("git checkout to %s", commit) + p = subprocess.Popen(["git", "checkout", commit], **kwargs) + _, err = p.communicate() + if p.returncode: + shutil.rmtree(repo_dir, ignore_errors=True) + raise GitCheckoutError( + "Git checkout error, please check the commit id.\n" + + err.decode() + ) + with cd(repo_dir): + shutil.rmtree(".git") + + return repo_dir + + @classmethod + def _check_clone_pipe(cls, p): + _, err = p.communicate() + if p.returncode: + raise GitPullError( + "Repo pull error, please check repo info.\n" + err.decode() + ) + + +class GitHTTPSFetcher(RepoFetcherBase): + @classmethod + @synchronized + def fetch( + cls, + git_host: str, + repo_info: str, + use_cache: bool = False, + commit: str = None, + silent: bool = True, + ) -> str: + """ + Fetches git repo by HTTPS protocol + + :param git_host: + host address of git repo + example: github.com + :param repo_info: + a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional + tag/branch. The default branch is ``master`` if not specified. + example: ``"brain_sdk/MegBrain[:hub]"`` + :param use_cache: + whether to use locally cached code or completely re-fetch + :param commit: + commit id on github or gitlab + :param silent: + whether to accept the stdout and stderr of the subprocess with PIPE, instead of + displaying on the screen + :return: + directory where the repo code is stored + """ + if not cls._check_git_host(git_host): + raise InvalidGitHost("git_host: '{}' is malformed.".format(git_host)) + + repo_owner, repo_name, branch_info = cls._parse_repo_info(repo_info) + normalized_branch_info = branch_info.replace("/", "_") + repo_dir_raw = "{}_{}_{}".format( + repo_owner, repo_name, normalized_branch_info + ) + ("_{}".format(commit) if commit else "") + repo_dir = cls._gen_repo_dir(repo_dir_raw) + archive_url = cls._git_archive_link( + git_host, repo_owner, repo_name, branch_info, commit + ) + + if use_cache and os.path.exists(repo_dir): # use cache + logger.debug("Cache Found in %s", repo_dir) + return repo_dir + + if is_distributed(): + logger.warning( + "When using `hub.load` or `hub.list` to fetch git repositories " + "in DISTRIBUTED mode for the first time, processes are synchronized to " + "ensure that target repository is ready to use for each process.\n" + "Users are expected to see this warning no more than ONCE, otherwise" + "(very little chance) you may need to remove corrupt hub cache %s and fetch again." + ) + + shutil.rmtree(repo_dir, ignore_errors=True) # ignore and clear cache + + logger.debug("Downloading from %s to %s", archive_url, repo_dir) + cls._download_zip_and_extract(archive_url, repo_dir) + + return repo_dir + + @classmethod + def _download_zip_and_extract(cls, url, target_dir): + resp = requests.get(url, timeout=HTTP_TIMEOUT, stream=True) + if resp.status_code != 200: + raise HTTPDownloadError( + "An error occured when downloading from {}".format(url) + ) + + total_size = int(resp.headers.get("Content-Length", 0)) + _bar = tqdm(total=total_size, unit="iB", unit_scale=True) + + with NamedTemporaryFile("w+b") as f: + for chunk in resp.iter_content(CHUNK_SIZE): + if not chunk: + break + _bar.update(len(chunk)) + f.write(chunk) + _bar.close() + f.seek(0) + with ZipFile(f) as temp_zip_f: + zip_dir_name = temp_zip_f.namelist()[0].split("/")[0] + temp_zip_f.extractall(".") + shutil.move(zip_dir_name, target_dir) + + @classmethod + def _git_archive_link(cls, git_host, repo_owner, repo_name, branch_info, commit): + archive_link = "https://{}/{}/{}/archive/{}.zip".format( + git_host, repo_owner, repo_name, commit or branch_info + ) + + return archive_link diff --git a/imperative/python/megengine/hub/hub.py b/imperative/python/megengine/hub/hub.py new file mode 100644 index 0000000000000000000000000000000000000000..139256e9f111184ca798bb136527e841620142bf --- /dev/null +++ b/imperative/python/megengine/hub/hub.py @@ -0,0 +1,333 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import functools +import hashlib +import os +import sys +import types +from typing import Any, List +from urllib.parse import urlparse + +from megengine.utils.http_download import download_from_url + +from ..distributed import is_distributed +from ..logger import get_logger +from ..serialization import load as _mge_load_serialized +from .const import ( + DEFAULT_CACHE_DIR, + DEFAULT_GIT_HOST, + DEFAULT_PROTOCOL, + ENV_MGE_HOME, + ENV_XDG_CACHE_HOME, + HTTP_READ_TIMEOUT, + HUBCONF, + HUBDEPENDENCY, +) +from .exceptions import InvalidProtocol +from .fetcher import GitHTTPSFetcher, GitSSHFetcher +from .tools import cd, check_module_exists, load_module + +logger = get_logger(__name__) + + +PROTOCOLS = { + "HTTPS": GitHTTPSFetcher, + "SSH": GitSSHFetcher, +} + + +def _get_megengine_home() -> str: + """MGE_HOME setting complies with the XDG Base Directory Specification + """ + megengine_home = os.path.expanduser( + os.getenv( + ENV_MGE_HOME, + os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "megengine"), + ) + ) + return megengine_home + + +def _get_repo( + git_host: str, + repo_info: str, + use_cache: bool = False, + commit: str = None, + protocol: str = DEFAULT_PROTOCOL, +) -> str: + if protocol not in PROTOCOLS: + raise InvalidProtocol( + "Invalid protocol, the value should be one of {}.".format( + ", ".join(PROTOCOLS.keys()) + ) + ) + cache_dir = os.path.expanduser(os.path.join(_get_megengine_home(), "hub")) + with cd(cache_dir): + fetcher = PROTOCOLS[protocol] + repo_dir = fetcher.fetch(git_host, repo_info, use_cache, commit) + return os.path.join(cache_dir, repo_dir) + + +def _check_dependencies(module: types.ModuleType) -> None: + if not hasattr(module, HUBDEPENDENCY): + return + + dependencies = getattr(module, HUBDEPENDENCY) + if not dependencies: + return + + missing_deps = [m for m in dependencies if not check_module_exists(m)] + if len(missing_deps): + raise RuntimeError("Missing dependencies: {}".format(", ".join(missing_deps))) + + +def _init_hub( + repo_info: str, + git_host: str, + use_cache: bool = True, + commit: str = None, + protocol: str = DEFAULT_PROTOCOL, +): + """Imports hubmodule like python import + + :param repo_info: + a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional + tag/branch. The default branch is ``master`` if not specified. + Example: ``"brain_sdk/MegBrain[:hub]"`` + :param git_host: + host address of git repo + Example: github.com + :param use_cache: + whether to use locally cached code or completely re-fetch + :param commit: + commit id on github or gitlab + :param protocol: + which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. + The value should be one of HTTPS, SSH. + :return: + hubconf.py as a python module + """ + cache_dir = os.path.expanduser(os.path.join(_get_megengine_home(), "hub")) + os.makedirs(cache_dir, exist_ok=True) + absolute_repo_dir = _get_repo( + git_host, repo_info, use_cache=use_cache, commit=commit, protocol=protocol + ) + sys.path.insert(0, absolute_repo_dir) + hubmodule = load_module(HUBCONF, os.path.join(absolute_repo_dir, HUBCONF)) + sys.path.remove(absolute_repo_dir) + + return hubmodule + + +@functools.wraps(_init_hub) +def import_module(*args, **kwargs): + return _init_hub(*args, **kwargs) + + +def list( + repo_info: str, + git_host: str = DEFAULT_GIT_HOST, + use_cache: bool = True, + commit: str = None, + protocol: str = DEFAULT_PROTOCOL, +) -> List[str]: + """Lists all entrypoints available in repo hubconf + + :param repo_info: + a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional + tag/branch. The default branch is ``master`` if not specified. + Example: ``"brain_sdk/MegBrain[:hub]"`` + :param git_host: + host address of git repo + Example: github.com + :param use_cache: + whether to use locally cached code or completely re-fetch + :param commit: + commit id on github or gitlab + :param protocol: + which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. + The value should be one of HTTPS, SSH. + :return: + all entrypoint names of the model + """ + hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) + + return [ + _ + for _ in dir(hubmodule) + if not _.startswith("__") and callable(getattr(hubmodule, _)) + ] + + +def load( + repo_info: str, + entry: str, + *args, + git_host: str = DEFAULT_GIT_HOST, + use_cache: bool = True, + commit: str = None, + protocol: str = DEFAULT_PROTOCOL, + **kwargs +) -> Any: + """Loads model from github or gitlab repo, with pretrained weights. + + :param repo_info: + a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional + tag/branch. The default branch is ``master`` if not specified. + Example: ``"brain_sdk/MegBrain[:hub]"`` + :param entry: + an entrypoint defined in hubconf + :param git_host: + host address of git repo + Example: github.com + :param use_cache: + whether to use locally cached code or completely re-fetch + :param commit: + commit id on github or gitlab + :param protocol: + which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. + The value should be one of HTTPS, SSH. + :return: + a single model with corresponding pretrained weights. + """ + hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) + + if not hasattr(hubmodule, entry) or not callable(getattr(hubmodule, entry)): + raise RuntimeError("Cannot find callable {} in hubconf.py".format(entry)) + + _check_dependencies(hubmodule) + + module = getattr(hubmodule, entry)(*args, **kwargs) + return module + + +def help( + repo_info: str, + entry: str, + git_host: str = DEFAULT_GIT_HOST, + use_cache: bool = True, + commit: str = None, + protocol: str = DEFAULT_PROTOCOL, +) -> str: + """This function returns docstring of entrypoint ``entry`` by following steps: + + 1. Pull the repo code specified by git and repo_info + 2. Load the entry defined in repo's hubconf.py + 3. Return docstring of function entry + + :param repo_info: + a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional + tag/branch. The default branch is ``master`` if not specified. + Example: ``"brain_sdk/MegBrain[:hub]"`` + :param entry: + an entrypoint defined in hubconf.py + :param git_host: + host address of git repo + Example: github.com + :param use_cache: + whether to use locally cached code or completely re-fetch + :param commit: + commit id on github or gitlab + :param protocol: + which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. + The value should be one of HTTPS, SSH. + :return: + docstring of entrypoint ``entry`` + """ + hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) + + if not hasattr(hubmodule, entry) or not callable(getattr(hubmodule, entry)): + raise RuntimeError("Cannot find callable {} in hubconf.py".format(entry)) + + doc = getattr(hubmodule, entry).__doc__ + return doc + + +def load_serialized_obj_from_url(url: str, model_dir=None) -> Any: + """Loads MegEngine serialized object from the given URL. + + If the object is already present in ``model_dir``, it's deserialized and + returned. If no ``model_dir`` is specified, it will be ``MGE_HOME/serialized``. + + :param url: url to serialized object + :param model_dir: dir to cache target serialized file + + :return: loaded object + """ + if model_dir is None: + model_dir = os.path.join(_get_megengine_home(), "serialized") + os.makedirs(model_dir, exist_ok=True) + + parts = urlparse(url) + filename = os.path.basename(parts.path) + + # use hash as prefix to avoid filename conflict from different urls + sha256 = hashlib.sha256() + sha256.update(url.encode()) + digest = sha256.hexdigest()[:6] + filename = digest + "_" + filename + + cached_file = os.path.join(model_dir, filename) + logger.info( + "load_serialized_obj_from_url: download to or using cached %s", cached_file + ) + if not os.path.exists(cached_file): + if is_distributed(): + logger.warning( + "Downloading serialized object in DISTRIBUTED mode\n" + " File may be downloaded multiple times. We recommend\n" + " users to download in single process first." + ) + download_from_url(url, cached_file, HTTP_READ_TIMEOUT) + + state_dict = _mge_load_serialized(cached_file) + return state_dict + + +class pretrained: + r""" + Decorator which helps to download pretrained weights from the given url. + + For example, we can decorate a resnet18 function as follows + + .. code-block:: + + @hub.pretrained("https://url/to/pretrained_resnet18.pkl") + def resnet18(**kwargs): + return ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + + When decorated function is called with ``pretrained=True``, MegEngine will automatically + download and fill the returned model with pretrained weights. + """ + + def __init__(self, url): + self.url = url + + def __call__(self, func): + @functools.wraps(func) + def pretrained_model_func( + pretrained=False, **kwargs + ): # pylint: disable=redefined-outer-name + model = func(**kwargs) + if pretrained: + weights = load_serialized_obj_from_url(self.url) + model.load_state_dict(weights) + return model + + return pretrained_model_func + + +__all__ = [ + "list", + "load", + "help", + "load_serialized_obj_from_url", + "pretrained", + "import_module", +] diff --git a/imperative/python/megengine/hub/tools.py b/imperative/python/megengine/hub/tools.py new file mode 100644 index 0000000000000000000000000000000000000000..0bf9c98c7b3cdc8958cd85a149df5a9158f83471 --- /dev/null +++ b/imperative/python/megengine/hub/tools.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import importlib.util +import os +import types +from contextlib import contextmanager +from typing import Iterator + + +def load_module(name: str, path: str) -> types.ModuleType: + """ + Loads module specified by name and path + + :param name: module name + :param path: module path + """ + spec = importlib.util.spec_from_file_location(name, path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def check_module_exists(module: str) -> bool: + """Checks whether python module exists or not + + :param module: name of module + """ + return importlib.util.find_spec(module) is not None + + +@contextmanager +def cd(target: str) -> Iterator[None]: + """Changes current directory to target + + :param target: target directory + """ + prev = os.getcwd() + os.chdir(os.path.expanduser(target)) + try: + yield + finally: + os.chdir(prev) diff --git a/imperative/python/megengine/logger.py b/imperative/python/megengine/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..9e926ca3be8f9feb33fe93724e13d6432b0d2cda --- /dev/null +++ b/imperative/python/megengine/logger.py @@ -0,0 +1,237 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import contextlib +import logging +import os +import sys + +_all_loggers = [] +_default_level_name = os.getenv("MEGENGINE_LOGGING_LEVEL", "ERROR") +_default_level = logging.getLevelName(_default_level_name.upper()) + + +def set_log_file(fout, mode="a"): + r"""Sets log output file. + + :type fout: str or file-like + :param fout: file-like object that supports write and flush, or string for + the filename + :type mode: str + :param mode: specify the mode to open log file if *fout* is a string + """ + if isinstance(fout, str): + fout = open(fout, mode) + MegEngineLogFormatter.log_fout = fout + + +class MegEngineLogFormatter(logging.Formatter): + log_fout = None + date_full = "[%(asctime)s %(lineno)d@%(filename)s:%(name)s] " + date = "%(asctime)s " + msg = "%(message)s" + max_lines = 256 + + def _color_exc(self, msg): + r"""Sets the color of message as the execution type. + """ + return "\x1b[34m{}\x1b[0m".format(msg) + + def _color_dbg(self, msg): + r"""Sets the color of message as the debugging type. + """ + return "\x1b[36m{}\x1b[0m".format(msg) + + def _color_warn(self, msg): + r"""Sets the color of message as the warning type. + """ + return "\x1b[1;31m{}\x1b[0m".format(msg) + + def _color_err(self, msg): + r"""Sets the color of message as the error type. + """ + return "\x1b[1;4;31m{}\x1b[0m".format(msg) + + def _color_omitted(self, msg): + r"""Sets the color of message as the omitted type. + """ + return "\x1b[35m{}\x1b[0m".format(msg) + + def _color_normal(self, msg): + r"""Sets the color of message as the normal type. + """ + return msg + + def _color_date(self, msg): + r"""Sets the color of message the same as date. + """ + return "\x1b[32m{}\x1b[0m".format(msg) + + def format(self, record): + if record.levelno == logging.DEBUG: + mcl, mtxt = self._color_dbg, "DBG" + elif record.levelno == logging.WARNING: + mcl, mtxt = self._color_warn, "WRN" + elif record.levelno == logging.ERROR: + mcl, mtxt = self._color_err, "ERR" + else: + mcl, mtxt = self._color_normal, "" + + if mtxt: + mtxt += " " + + if self.log_fout: + self.__set_fmt(self.date_full + mtxt + self.msg) + formatted = super(MegEngineLogFormatter, self).format(record) + nr_line = formatted.count("\n") + 1 + if nr_line >= self.max_lines: + head, body = formatted.split("\n", 1) + formatted = "\n".join( + [ + head, + "BEGIN_LONG_LOG_{}_LINES{{".format(nr_line - 1), + body, + "}}END_LONG_LOG_{}_LINES".format(nr_line - 1), + ] + ) + self.log_fout.write(formatted) + self.log_fout.write("\n") + self.log_fout.flush() + + self.__set_fmt(self._color_date(self.date) + mcl(mtxt + self.msg)) + formatted = super(MegEngineLogFormatter, self).format(record) + + if record.exc_text or record.exc_info: + # handle exception format + b = formatted.find("Traceback ") + if b != -1: + s = formatted[b:] + s = self._color_exc(" " + s.replace("\n", "\n ")) + formatted = formatted[:b] + s + + nr_line = formatted.count("\n") + 1 + if nr_line >= self.max_lines: + lines = formatted.split("\n") + remain = self.max_lines // 2 + removed = len(lines) - remain * 2 + if removed > 0: + mid_msg = self._color_omitted( + "[{} log lines omitted (would be written to output file " + "if set_log_file() has been called;\n" + " the threshold can be set at " + "MegEngineLogFormatter.max_lines)]".format(removed) + ) + formatted = "\n".join(lines[:remain] + [mid_msg] + lines[-remain:]) + + return formatted + + if sys.version_info.major < 3: + + def __set_fmt(self, fmt): + self._fmt = fmt + + else: + + def __set_fmt(self, fmt): + self._style._fmt = fmt + + +def get_logger(name=None, formatter=MegEngineLogFormatter): + r"""Gets megengine logger with given name. + """ + + logger = logging.getLogger(name) + if getattr(logger, "_init_done__", None): + return logger + logger._init_done__ = True + logger.propagate = False + logger.setLevel(_default_level) + handler = logging.StreamHandler() + handler.setFormatter(formatter(datefmt="%d %H:%M:%S")) + handler.setLevel(0) + del logger.handlers[:] + logger.addHandler(handler) + _all_loggers.append(logger) + return logger + + +def set_log_level(level, update_existing=True): + """Sets default logging level. + + :type level: int e.g. logging.INFO + :param level: loggin level given by python :mod:`logging` module + :param update_existing: whether to update existing loggers + """ + global _default_level # pylint: disable=global-statement + _default_level = level + if update_existing: + for i in _all_loggers: + i.setLevel(level) + + +_logger = get_logger(__name__) + +try: + if sys.version_info.major < 3: + raise ImportError() + + from .core._imperative_rt.utils import Logger as _imperative_rt_logger + + class MegBrainLogFormatter(MegEngineLogFormatter): + date = "%(asctime)s[mgb] " + + def _color_date(self, msg): + return "\x1b[33m{}\x1b[0m".format(msg) + + _megbrain_logger = get_logger("megbrain", MegBrainLogFormatter) + _imperative_rt_logger.set_log_handler(_megbrain_logger) + if _default_level == logging.getLevelName("ERROR"): + _imperative_rt_logger.set_log_level(_imperative_rt_logger.LogLevel.Error) + elif _default_level == logging.getLevelName("INFO"): + _imperative_rt_logger.set_log_level(_imperative_rt_logger.LogLevel.Info) + else: + _imperative_rt_logger.set_log_level(_imperative_rt_logger.LogLevel.Debug) + + def set_mgb_log_level(level): + r"""Sets megbrain log level + + :type level: int e.g. logging.INFO + :param level: new log level + :return: original log level + """ + logger = _megbrain_logger + rst = logger.getEffectiveLevel() + logger.setLevel(level) + return rst + + +except ImportError as exc: + + def set_mgb_log_level(level): + raise NotImplementedError("imperative_rt has not been imported") + + +@contextlib.contextmanager +def replace_mgb_log_level(level): + r"""Replaces megbrain log level in a block and restore after exiting. + + :type level: int e.g. logging.INFO + :param level: new log level + """ + old = set_mgb_log_level(level) + try: + yield + finally: + set_mgb_log_level(old) + + +def enable_debug_log(): + r"""Sets logging level to debug for all components. + """ + set_log_level(logging.DEBUG) + set_mgb_log_level(logging.DEBUG) diff --git a/imperative/python/megengine/module/__init__.py b/imperative/python/megengine/module/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a10228f20eae774f3a09ab0288d39787963d581f --- /dev/null +++ b/imperative/python/megengine/module/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +from .activation import LeakyReLU, PReLU, ReLU, Sigmoid, Softmax +from .batchnorm import BatchNorm1d, BatchNorm2d, SyncBatchNorm +from .concat import Concat +from .conv import Conv2d, ConvRelu2d, ConvTranspose2d, LocalConv2d +from .conv_bn import ConvBn2d, ConvBnRelu2d +from .dropout import Dropout +from .elemwise import Elemwise +from .embedding import Embedding +from .identity import Identity +from .linear import Linear +from .module import Module +from .parampack import ParamPack +from .pooling import AvgPool2d, MaxPool2d +from .quant_dequant import DequantStub, QuantStub +from .sequential import Sequential diff --git a/imperative/python/megengine/module/activation.py b/imperative/python/megengine/module/activation.py new file mode 100644 index 0000000000000000000000000000000000000000..025844ed03ac6803d438f3686537e5f88e98641e --- /dev/null +++ b/imperative/python/megengine/module/activation.py @@ -0,0 +1,231 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +from ..functional import leaky_relu, prelu, relu, sigmoid, softmax +from ..tensor_nn import Parameter +from .module import Module + + +class Softmax(Module): + r""" + Applies a softmax function. Softmax is defined as: + + .. math:: + \text{Softmax}(x_{i}) = \frac{exp(x_i)}{\sum_j exp(x_j)} + + It is applied to an n-dimensional input Tensor and rescaling them so that the elements of the + n-dimensional output Tensor lie in the range of `[0, 1]` and sum to 1. + + :param axis: An axis along which softmax will be applied. By default, + softmax will apply along the highest ranked axis. + + Examples: + + .. testcode:: + + import numpy as np + import megengine as mge + import megengine.module as M + + data = mge.tensor(np.array([-2,-1,0,1,2]).astype(np.float32)) + softmax = M.Softmax() + output = softmax(data) + with np.printoptions(precision=6): + print(output.numpy()) + + Outputs: + + .. testoutput:: + + [0.011656 0.031685 0.086129 0.234122 0.636409] + + """ + + def __init__(self, axis=None): + super().__init__() + self.axis = axis + + def forward(self, inputs): + return softmax(inputs, self.axis) + + +class Sigmoid(Module): + r""" + Applies the element-wise function: + + .. math:: + \text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)} + + Examples: + + .. testcode:: + + import numpy as np + import megengine as mge + import megengine.module as M + + data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) + sigmoid = M.Sigmoid() + output = sigmoid(data) + with np.printoptions(precision=6): + print(output.numpy()) + + Outputs: + + .. testoutput:: + + [0.119203 0.268941 0.5 0.731059 0.880797] + + """ + + def forward(self, inputs): + return sigmoid(inputs) + + +class ReLU(Module): + r""" + Applies the element-wise function: + + .. math:: + \text{ReLU}(x) = \max(x, 0) + + Examples: + + .. testcode:: + + import numpy as np + import megengine as mge + import megengine.module as M + data = mge.tensor(np.array([-2,-1,0,1,2,]).astype(np.float32)) + relu = M.ReLU() + output = relu(data) + with np.printoptions(precision=6): + print(output.numpy()) + + Outputs: + + .. testoutput:: + + [0. 0. 0. 1. 2.] + + """ + + def forward(self, x): + return relu(x) + + +class PReLU(Module): + r""" + Applies the element-wise function: + + .. math:: + \text{PReLU}(x) = \max(0,x) + a * \min(0,x) + + or + + .. math:: + \text{PReLU}(x) = + \begin{cases} + x, & \text{ if } x \geq 0 \\ + ax, & \text{ otherwise } + \end{cases} + + Here :math:`a` is a learnable parameter. When called without arguments, `PReLU()` uses + a single paramter :math:`a` across all input channel. If called with `PReLU(num_of_channels)`, + a seperate :math:`a` is used for each input channle. + + :param num_parameters: number of :math:`a` to learn, there is only two + values are legitimate: 1, or the number of channels at input. Default: 1 + :param init: the initial value of :math:`a`. Default: 0.25 + + Examples: + + .. testcode:: + + import numpy as np + import megengine as mge + import megengine.module as M + data = mge.tensor(np.array([-1.2, -3.7, 2.7]).astype(np.float32)) + prelu = M.PReLU() + output = prelu(data) + print(output.numpy()) + + Outputs: + + .. testoutput:: + + [-0.3 -0.925 2.7 ] + + """ + + def __init__(self, num_parameters: int = 1, init: float = 0.25): + super().__init__() + self.num_parameters = num_parameters + if num_parameters > 1: + # Assume format is NCHW + self.weight = Parameter( + data=np.full((1, num_parameters, 1, 1), init, dtype=np.float32) + ) + else: + self.weight = Parameter(data=[init]) + + def forward(self, inputs): + assert self.weight.shape == (1,) or self.weight.shape == ( + 1, + int(inputs.shape[1]), + 1, + 1, + ), "invalid weight's shape" + return prelu(inputs, self.weight) + + +class LeakyReLU(Module): + r""" + Applies the element-wise function: + + .. math:: + \text{LeakyReLU}(x) = \max(0,x) + negative\_slope \times \min(0,x) + + or + + .. math:: + \text{LeakyReLU}(x) = + \begin{cases} + x, & \text{ if } x \geq 0 \\ + negative\_slope \times x, & \text{ otherwise } + \end{cases} + + Examples: + + .. testcode:: + + import numpy as np + import megengine as mge + import megengine.module as M + data = mge.tensor(np.array([-8, -12, 6, 10]).astype(np.float32)) + + leakyrelu = M.LeakyReLU(0.01) + output = leakyrelu(data) + print(output.numpy()) + + Outputs: + + .. testoutput:: + + [-0.08 -0.12 6. 10. ] + + """ + + def __init__(self, negative_slope: float = 0.01): + super().__init__() + self.negative_slope = negative_slope + + def forward(self, inputs): + return leaky_relu(inputs, self.negative_slope) diff --git a/imperative/python/megengine/module/batchnorm.py b/imperative/python/megengine/module/batchnorm.py new file mode 100644 index 0000000000000000000000000000000000000000..ac154e1cfdf1adfdcc68023a77f984e08b1bfcd5 --- /dev/null +++ b/imperative/python/megengine/module/batchnorm.py @@ -0,0 +1,281 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Optional + +import numpy as np + +from ..distributed.group import WORLD, Group +from ..functional import batch_norm2d, sync_batch_norm +from ..tensor_nn import Buffer, Parameter +from . import init +from .module import Module + + +class _BatchNorm(Module): + def __init__( + self, + num_features, + eps=1e-5, + momentum=0.9, + affine=True, + track_running_stats=True, + freeze=False, + ): + super(_BatchNorm, self).__init__() + self.num_features = num_features + self.eps = eps + self.momentum = momentum + self.affine = affine + self.track_running_stats = track_running_stats + self._track_running_stats_saved = track_running_stats + self.freeze = freeze + if self.affine: + self.weight = Parameter(np.ones(num_features, dtype=np.float32)) + self.bias = Parameter(np.zeros(num_features, dtype=np.float32)) + else: + self.weight = None + self.bias = None + + tshape = (1, self.num_features, 1, 1) + + if self.track_running_stats: + self.running_mean = Buffer(np.zeros(tshape, dtype=np.float32)) + self.running_var = Buffer(np.ones(tshape, dtype=np.float32)) + else: + self.running_mean = None + self.running_var = None + + def reset_running_stats(self) -> None: + if self.track_running_stats: + init.zeros_(self.running_mean) + init.ones_(self.running_var) + + def reset_parameters(self) -> None: + self.reset_running_stats() + if self.affine: + init.ones_(self.weight) + init.zeros_(self.bias) + + def _check_input_ndim(self, inp): + raise NotImplementedError + + def forward(self, inp): + self._check_input_ndim(inp) + if self._track_running_stats_saved == False: + assert ( + self.track_running_stats == False + ), "track_running_stats can not be initilized to False and changed to True later" + + _ndims = len(inp.shape) + if _ndims != 4: + origin_shape = inp.shapeof() + if _ndims == 2: + n, c = inp.shapeof(0), inp.shapeof(1) + new_shape = (n, c, 1, 1) + elif _ndims == 3: + n, c, h = inp.shapeof(0), inp.shapeof(1), inp.shapeof(2) + new_shape = (n, c, h, 1) + + inp = inp.reshape(new_shape) + + if self.freeze and self.training and self._track_running_stats_saved: + scale = self.weight.reshape(1, -1, 1, 1) * ( + self.running_var + self.eps + ) ** (-0.5) + bias = self.bias.reshape(1, -1, 1, 1) - self.running_mean * scale + return inp * scale.detach() + bias.detach() + + if self.training and self.track_running_stats: + exponential_average_factor = self.momentum + else: + exponential_average_factor = 0.0 # useless + + output = batch_norm2d( + inp, + self.running_mean if self.track_running_stats else None, + self.running_var if self.track_running_stats else None, + self.weight, + self.bias, + training=self.training + or ((self.running_mean is None) and (self.running_var is None)), + momentum=exponential_average_factor, + eps=self.eps, + ) + + if _ndims != 4: + output = output.reshape(origin_shape) + + return output + + +class SyncBatchNorm(_BatchNorm): + r""" + Applies Synchronization Batch Normalization. + """ + + def __init__( + self, + num_features, + eps=1e-5, + momentum=0.9, + affine=True, + track_running_stats=True, + freeze=False, + group: Optional[Group] = None, + ) -> None: + super().__init__( + num_features, eps, momentum, affine, track_running_stats, freeze + ) + self.group = group + + def _check_input_ndim(self, inp): + if len(inp.shape) not in {2, 3, 4}: + raise ValueError( + "expected 2D, 3D or 4D input (got {}D input)".format(len(inp.shape)) + ) + + def forward(self, inp): + self._check_input_ndim(inp) + + _ndims = len(inp.shape) + if _ndims != 4: + origin_shape = inp.shapeof() + if _ndims == 2: + n, c = inp.shapeof(0), inp.shapeof(1) + new_shape = (n, c, 1, 1) + elif _ndims == 3: + n, c, h = inp.shapeof(0), inp.shapeof(1), inp.shapeof(2) + new_shape = (n, c, h, 1) + + inp = inp.reshape(new_shape) + + if self.training and self.track_running_stats: + exponential_average_factor = self.momentum + else: + exponential_average_factor = 0.0 # useless + + output = sync_batch_norm( + inp, + self.running_mean, + self.running_var, + self.weight, + self.bias, + self.training or not self.track_running_stats, + exponential_average_factor, + self.eps, + group=self.group, + ) + + if _ndims != 4: + output = output.reshape(origin_shape) + + return output + + +class BatchNorm1d(_BatchNorm): + r""" + Applies Batch Normalization over a 2D/3D tensor. + + Refer to :class:`~.BatchNorm2d` for more information. + """ + + def _check_input_ndim(self, inp): + if len(inp.shape) not in {2, 3}: + raise ValueError( + "expected 2D or 3D input (got {}D input)".format(len(inp.shape)) + ) + + +class BatchNorm2d(_BatchNorm): + r""" + Applies Batch Normalization over a 4D tensor. + + .. math:: + + y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta + + The mean and standard-deviation are calculated per-dimension over + the mini-batches and :math:`\gamma` and :math:`\beta` are learnable + parameter vectors. + + By default, during training this layer keeps running estimates of its + computed mean and variance, which are then used for normalization during + evaluation. The running estimates are kept with a default :attr:`momentum` + of 0.9. + + If :attr:`track_running_stats` is set to ``False``, this layer will not + keep running estimates, and batch statistics are instead used during + evaluation time. + + .. note:: + This :attr:`momentum` argument is different from one used in optimizer + classes and the conventional notion of momentum. Mathematically, the + update rule for running statistics here is + :math:`\hat{x}_\text{new} = \text{momentum} \times \hat{x} + (1 - \text{momentum}) \times x_t`, + where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the + new observed value. + + Because the Batch Normalization is done over the `C` dimension, computing + statistics on `(N, H, W)` slices, it's common terminology to call this + Spatial Batch Normalization. + + :type num_features: int + :param num_features: usually the :math:`C` from an input of size + :math:`(N, C, H, W)` or the highest ranked dimension of an input with + less than 4D. + :type eps: float + :param eps: a value added to the denominator for numerical stability. + Default: 1e-5. + :type momentum: float + :param momentum: the value used for the `running_mean` and `running_var` + computation. + Default: 0.9 + :type affine: bool + :param affine: a boolean value that when set to ``True``, this module has + learnable affine parameters. Default: ``True`` + :type track_running_stats: bool + :param track_running_stats: when set to ``True``, this module tracks the + running mean and variance. When set to ``False``, this module does not + track such statistics and always uses batch statistics in both training + and eval modes. Default: ``True``. + + :type freeze: bool + :param freeze: when set to ``True``, this module does not update the + running mean and variance, and uses the running mean and variance instead of + the batch mean and batch variance to normalize the input. The parameter takes effect + only when the module is initilized with ``track_running_stats`` as ``True`` and + the module is in training mode. + Default: ``False``. + + Examples: + + .. testcode:: + + import megengine as mge + import megengine.module as M + + # With Learnable Parameters + m = M.BatchNorm2d(4) + inp = mge.tensor(np.random.rand(1, 4, 3, 3).astype("float32")) + oup = m(inp) + print(m.weight, m.bias) + # Without Learnable Parameters + m = M.BatchNorm2d(4, affine=False) + oup = m(inp) + print(m.weight, m.bias) + + .. testoutput:: + + Tensor([1. 1. 1. 1.]) Tensor([0. 0. 0. 0.]) + None None + """ + + def _check_input_ndim(self, inp): + if len(inp.shape) != 4: + raise ValueError("expected 4D input (got {}D input)".format(len(inp.shape))) diff --git a/imperative/python/megengine/module/concat.py b/imperative/python/megengine/module/concat.py new file mode 100644 index 0000000000000000000000000000000000000000..7eca519c549636265e542d6d97fae0c8c98cfc66 --- /dev/null +++ b/imperative/python/megengine/module/concat.py @@ -0,0 +1,22 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Iterable + +from ..functional import concat +from ..tensor import Tensor +from .module import Module + + +class Concat(Module): + r""" + A :class:`~.Module` to do functional concat. Could be replaced with :class:`~.QATModule` + version :class:`~.qat.concat.Concat` using :func:`~.quantize.quantize_qat`. + """ + + def forward(self, inps: Iterable[Tensor], axis: int = 0): + return concat(inps, axis) diff --git a/imperative/python/megengine/module/conv.py b/imperative/python/megengine/module/conv.py new file mode 100644 index 0000000000000000000000000000000000000000..87bdc244e3b564bfd6c784a3456c4bc6379b808b --- /dev/null +++ b/imperative/python/megengine/module/conv.py @@ -0,0 +1,391 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from abc import abstractmethod +from typing import Tuple, Union + +import numpy as np + +from ..core.ops._internal import param_defs as P +from ..functional import conv2d, conv_transpose2d, local_conv2d, relu +from ..functional.types import _pair, _pair_nonzero +from ..tensor_nn import Parameter +from . import init +from .module import Module + + +class _ConvNd(Module): + """base class for convolution modules, including transposed conv""" + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, int]], + stride: Union[int, Tuple[int, int]], + padding: Union[int, Tuple[int, int]], + dilation: Union[int, Tuple[int, int]], + groups: int, + bias: bool = True, + ): + super().__init__() + if in_channels % groups != 0: + raise ValueError("in_channels must be divisible by groups") + if out_channels % groups != 0: + raise ValueError("out_channels must be divisible by groups") + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + + self.weight = Parameter(np.zeros(self._infer_weight_shape(), dtype=np.float32)) + self.bias = None + if bias: + self.bias = Parameter(np.zeros(self._infer_bias_shape(), dtype=np.float32)) + self.reset_parameters() + + @abstractmethod + def _get_fanin(self): + pass + + def reset_parameters(self) -> None: + fanin = self._get_fanin() + std = np.sqrt(1 / fanin) + init.normal_(self.weight, 0.0, std) + if self.bias is not None: + init.zeros_(self.bias) + + @abstractmethod + def _infer_weight_shape(self): + pass + + @abstractmethod + def _infer_bias_shape(self): + pass + + +class Conv2d(_ConvNd): + r"""Applies a 2D convolution over an input tensor. + + For instance, given an input of the size :math:`(N, C_{\text{in}}, H, W)`, + this layer generates an output of the size + :math:`(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})` through the + process described as below: + + .. math:: + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k) + + where :math:`\star` is the valid 2D cross-correlation operator, + :math:`N` is a batch size, :math:`C` denotes a number of channels, + :math:`H` is a height of input planes in pixels, and :math:`W` is + width in pixels. + + When ``groups == in_channels`` and ``out_channels == K * in_channels``, + where `K` is a positive integer, this operation is also known as depthwise + convolution. + + In other words, for an input of size :math:`(N, C_{in}, H_{in}, W_{in})`, + a depthwise convolution with a depthwise multiplier `K`, can be constructed + by arguments :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`. + + :param in_channels: number of input channels. + :param out_channels: number of output channels. + :param kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is + an :class:`int`, the actual kernel size would be + ``(kernel_size, kernel_size)``. Default: 1 + :param stride: stride of the 2D convolution operation. Default: 1 + :param padding: size of the paddings added to the input on both sides of its + spatial dimensions. Only zero-padding is supported. Default: 0 + :param dilation: dilation of the 2D convolution operation. Default: 1 + :param groups: number of groups to divide input and output channels into, + so as to perform a "grouped convolution". When ``groups`` is not 1, + ``in_channels`` and ``out_channels`` must be divisible by ``groups``, + and there would be an extra dimension at the beginning of the weight's + shape. Specifically, the shape of weight would be ``(groups, + out_channel // groups, in_channels // groups, *kernel_size)``. + :param bias: whether to add a bias onto the result of convolution. Default: + True + :param conv_mode: Supports `CROSS_CORRELATION` or `CONVOLUTION`. Default: + `CROSS_CORRELATION`. + :param compute_mode: When set to `DEFAULT`, no special requirements will be + placed on the precision of intermediate results. When set to `FLOAT32`, + float32 would be used for accumulator and intermediate result, but only + effective when input and output are of float16 dtype. + """ + + _conv_mode_type = P.Convolution.Mode + _compute_mode_type = P.Convolution.ComputeMode + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, int]], + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + groups: int = 1, + bias: bool = True, + conv_mode: str = "CROSS_CORRELATION", + compute_mode: str = "DEFAULT", + ): + kernel_size = _pair_nonzero(kernel_size) + stride = _pair_nonzero(stride) + padding = _pair(padding) + dilation = _pair_nonzero(dilation) + self.conv_mode = self._conv_mode_type.convert(conv_mode) + self.compute_mode = self._compute_mode_type.convert(compute_mode) + super().__init__( + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation, + groups, + bias, + ) + + def _get_fanin(self): + kh, kw = self.kernel_size + ic = self.in_channels + return kh * kw * ic + + def _infer_weight_shape(self): + group = self.groups + ichl = self.in_channels + ochl = self.out_channels + kh, kw = self.kernel_size + if group == 1: + # Assume format is NCHW + return (ochl, ichl, kh, kw) + + assert ( + ichl % group == 0 and ochl % group == 0 + ), "invalid config: input_channels={} output_channels={} group={}".format( + ichl, ochl, group + ) + # Assume format is NCHW + return (group, ochl // group, ichl // group, kh, kw) + + def _infer_bias_shape(self): + # Assume format is NCHW + return (1, self.out_channels, 1, 1) + + def calc_conv(self, inp, weight, bias): + return conv2d( + inp, + weight, + bias, + self.stride, + self.padding, + self.dilation, + self.groups, + self.conv_mode, + self.compute_mode, + ) + + def forward(self, inp): + return self.calc_conv(inp, self.weight, self.bias) + + +class ConvTranspose2d(_ConvNd): + r"""Applies a 2D transposed convolution over an input tensor. + + This module is also known as a deconvolution or a fractionally-strided convolution. + :class:`ConvTranspose2d` can ben seen as the gradient of :class:`Conv2d` operation + with respect to its input. + + Convolution usually reduces the size of input, while transposed convolution works + the opposite way, transforming a smaller input to a larger output while preserving the + connectivity pattern. + + :param in_channels: number of input channels. + :param out_channels: number of output channels. + :param kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is + an :class:`int`, the actual kernel size would be + ``(kernel_size, kernel_size)``. Default: 1 + :param stride: stride of the 2D convolution operation. Default: 1 + :param padding: size of the paddings added to the input on both sides of its + spatial dimensions. Only zero-padding is supported. Default: 0 + :param dilation: dilation of the 2D convolution operation. Default: 1 + :param groups: number of groups to divide input and output channels into, + so as to perform a "grouped convolution". When ``groups`` is not 1, + ``in_channels`` and ``out_channels`` must be divisible by ``groups``, + and there would be an extra dimension at the beginning of the weight's + shape. Specifically, the shape of weight would be ``(groups, + out_channels // groups, in_channels // groups, *kernel_size)``. Default: 1 + :param bias: wether to add a bias onto the result of convolution. Default: + True + :param conv_mode: Supports `CROSS_CORRELATION` or `CONVOLUTION`. Default: + `CROSS_CORRELATION`. + :param compute_mode: When set to `DEFAULT`, no special requirements will be + placed on the precision of intermediate results. When set to `FLOAT32`, + float32 would be used for accumulator and intermediate result, but only + effective when input and output are of float16 dtype. + """ + + _conv_mode_type = P.Convolution.Mode + _compute_mode_type = P.Convolution.ComputeMode + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, int]], + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + groups: int = 1, + bias: bool = True, + conv_mode: str = "CROSS_CORRELATION", + compute_mode: str = "DEFAULT", + ): + kernel_size = _pair_nonzero(kernel_size) + stride = _pair_nonzero(stride) + padding = _pair(padding) + dilation = _pair_nonzero(dilation) + self.conv_mode = self._conv_mode_type.convert(conv_mode) + self.compute_mode = self._compute_mode_type.convert(compute_mode) + super().__init__( + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation, + groups, + bias, + ) + + def _get_fanin(self): + kh, kw = self.kernel_size + oc = self.out_channels + return kh * kw * oc + + def _infer_weight_shape(self): + group = self.groups + ichl = self.in_channels + ochl = self.out_channels + kh, kw = self.kernel_size + if group == 1: + # Assume format is NCHW + return (ichl, ochl, kh, kw) + + assert ( + ichl % group == 0 and ochl % group == 0 + ), "invalid config: input_channels={} output_channels={} group={}".format( + ichl, ochl, group + ) + # Assume format is NCHW + return (group, ichl // group, ochl // group, kh, kw) + + def _infer_bias_shape(self): + # Assume format is NCHW + return (1, self.out_channels, 1, 1) + + def forward(self, inp): + return conv_transpose2d( + inp, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + self.conv_mode, + self.compute_mode, + ) + + +class LocalConv2d(Conv2d): + r"""Applies a spatial convolution with untied kernels over an input 4D tensor. + It is also known as the locally connected layer. + + :param in_channels: number of input channels. + :param out_channels: number of output channels. + :param input_height: the height of the input images. + :param input_width: the width of the input images. + :param kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is + an :class:`int`, the actual kernel size would be + ``(kernel_size, kernel_size)``. Default: 1 + :param stride: stride of the 2D convolution operation. Default: 1 + :param padding: size of the paddings added to the input on both sides of its + spatial dimensions. Only zero-padding is supported. Default: 0 + :param groups: number of groups to divide input and output channels into, + so as to perform a "grouped convolution". When ``groups`` is not 1, + ``in_channels`` and ``out_channels`` must be divisible by ``groups``. + The shape of weight is ``(groups, output_height, output_width, + in_channels // groups, *kernel_size, out_channels // groups)``. + """ + + _conv_mode_type = P.Convolution.Mode + + def __init__( + self, + in_channels: int, + out_channels: int, + input_height: int, + input_width: int, + kernel_size: Union[int, Tuple[int, int]], + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + groups: int = 1, + conv_mode: str = "CROSS_CORRELATION", + ): + self.input_height = input_height + self.input_width = input_width + super().__init__( + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation, + groups, + bias=False, + ) + + def _infer_weight_shape(self): + group = self.groups + output_height = ( + self.input_height + self.padding[0] * 2 - self.kernel_size[0] + ) // self.stride[0] + 1 + output_width = ( + self.input_width + self.padding[1] * 2 - self.kernel_size[1] + ) // self.stride[1] + 1 + # Assume format is NCHW + return ( + group, + output_height, + output_width, + self.in_channels // group, + self.kernel_size[0], + self.kernel_size[1], + self.out_channels // group, + ) + + def forward(self, inp): + return local_conv2d( + inp, self.weight, self.stride, self.padding, self.dilation, self.conv_mode + ) + + +class ConvRelu2d(Conv2d): + r""" + A fused :class:`~.Module` including Conv2d and relu. Could be replaced + with :class:`~.QATModule` version :class:`~.qat.conv.ConvRelu2d` using + :func:`~.quantize.quantize_qat`. + """ + + def forward(self, inp): + return relu(self.calc_conv(inp, self.weight, self.bias)) diff --git a/imperative/python/megengine/module/conv_bn.py b/imperative/python/megengine/module/conv_bn.py new file mode 100644 index 0000000000000000000000000000000000000000..76713b0f81e502900de5ce34b2faa96ddda595a2 --- /dev/null +++ b/imperative/python/megengine/module/conv_bn.py @@ -0,0 +1,69 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Tuple, Union + +from ..functional import relu +from .batchnorm import BatchNorm2d +from .conv import Conv2d +from .module import Module + + +class _ConvBnActivation2d(Module): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, int]], + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + groups: int = 1, + bias: bool = True, + conv_mode: str = "CROSS_CORRELATION", + compute_mode: str = "DEFAULT", + eps=1e-5, + momentum=0.9, + affine=True, + track_running_stats=True, + ): + super().__init__() + self.conv = Conv2d( + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation, + groups, + bias, + conv_mode, + compute_mode, + ) + self.bn = BatchNorm2d(out_channels, eps, momentum, affine, track_running_stats) + + +class ConvBn2d(_ConvBnActivation2d): + r""" + A fused :class:`~.Module` including Conv2d, BatchNorm2d. Could be replaced + with :class:`~.QATModule` version :class:`~.qat.conv_bn.ConvBn2d` using + :func:`~.quantize.quantize_qat`. + """ + + def forward(self, inp): + return self.bn(self.conv(inp)) + + +class ConvBnRelu2d(_ConvBnActivation2d): + r""" + A fused :class:`~.Module` including Conv2d, BatchNorm2d and relu. Could be replaced + with :class:`~.QATModule` version :class:`~.qat.conv_bn.ConvBnRelu2d` using + :func:`~.quantize.quantize_qat`. + """ + + def forward(self, inp): + return relu(self.bn(self.conv(inp))) diff --git a/imperative/python/megengine/module/dropout.py b/imperative/python/megengine/module/dropout.py new file mode 100644 index 0000000000000000000000000000000000000000..146eba24544bd713e3c2210a78e1466317012ba6 --- /dev/null +++ b/imperative/python/megengine/module/dropout.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from ..functional import dropout +from .module import Module + + +class Dropout(Module): + r"""Randomly set input elements to zeros with the probability :math:`drop\_prob` during training. Commonly used in large networks to prevent overfitting. + Note that we perform dropout only during training, we also rescale(multiply) the output tensor + by :math:`\frac{1}{1 - drop\_prob}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`. + + :param drop_prob: The probability to drop (set to zero) each single element + """ + + def __init__(self, drop_prob=0.0): + super().__init__() + self.drop_prob = drop_prob + + def forward(self, inputs): + if self.training: + return dropout(inputs, self.drop_prob, rescale=True) + else: + return inputs diff --git a/imperative/python/megengine/module/elemwise.py b/imperative/python/megengine/module/elemwise.py new file mode 100644 index 0000000000000000000000000000000000000000..041f56a5de80873e2a85ec92dd1b32e6c7936227 --- /dev/null +++ b/imperative/python/megengine/module/elemwise.py @@ -0,0 +1,79 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from ..core.ops._internal import param_defs as P +from ..functional.elemwise import _elwise +from ..tensor import Tensor +from .module import Module + + +class Elemwise(Module): + r""" + A :class:`~.Module` to do elemwise operator. Could be replaced with :class:`~.QATModule` + version :class:`~.qat.elemwise.Elemwise` using :func:`~.quantize.quantize_qat`. + + :param method: the elemwise method, support the following string. + It will do the normal elemwise operator for float. + + * "ADD": a + b + * "FUSE_ADD_RELU": max(x+y, 0) + * "MUL": x * y + * "MIN": min(x, y) + * "MAX": max(x, y) + * "SUB": x - y + * "TRUE_DIV": x / y + * "FUSE_ADD_SIGMOID": sigmoid(x + y) + * "FUSE_ADD_TANH": tanh(x + y) + * "RELU": x > 0 ? x : 0 + * "ABS": x > 0 ? x : -x + * "SIGMOID": sigmoid(x) + * "EXP": exp(x) + * "TANH": tanh(x) + * "FUSE_MUL_ADD3": x * y + z + * "FAST_TANH": fast_tanh(x) + * "NEGATE": -x + * "ACOS": acos(x) + * "ASIN": asin(x) + * "CEIL": ceil(x) + * "COS": cos(x) + * "EXPM1": expm1(x) + * "FLOOR": floor(x) + * "LOG": log(x) + * "LOG1P": log1p(x) + * "SIN": sin(x) + * "ROUND": round(x) + * "ERF": erf(x) + * "ERFINV": erfinv(x) + * "ERFC": erfc(x) + * "ERFCINV": erfcinv(x) + * "ABS_GRAD": abs_grad + * "FLOOR_DIV": floor_div + * "MOD": mod + * "SIGMOID_GRAD": sigmoid_grad + * "SWITCH_GT0": switch_gt0 + * "TANH_GRAD": tanh_grad + * "LT": lt + * "LEQ": leq + * "EQ": eq + * "POW": pow + * "LOG_SUM_EXP": log_sum_exp + * "FAST_TANH_GRAD": fast_tanh_grad + * "ATAN2": atan2 + * "COND_LEQ_MOV": cond_leq_mov + * "H_SWISH": h_swish + * "FUSE_ADD_H_SWISH": h_swish(x+y) + * "H_SWISH_GRAD": h_swish_grad + """ + + _elemwise_mode_type = P.Elemwise.Mode + + def __init__(self, method): + super().__init__() + self.method = self._elemwise_mode_type.convert(method) + + def forward(self, *inps): + return _elwise(*inps, mode=self.method) diff --git a/imperative/python/megengine/module/embedding.py b/imperative/python/megengine/module/embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..15c196517cefaecb0e146707affde9affdccf098 --- /dev/null +++ b/imperative/python/megengine/module/embedding.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Optional + +import numpy as np + +from ..functional import embedding as embedding_func +from ..tensor_nn import Parameter +from . import init +from .module import Module + + +class Embedding(Module): + r""" + A simple lookup table that stores embeddings of a fixed dictionary and size. + + This module is often used to store word embeddings and retrieve them using indices. + The input to the module is a list of indices, and the output is the corresponding word embeddings. + The indices should less than num_embeddings. + + :param num_embeddings: size of embedding dictionary. + :param embedding_dim: size of each embedding vector. + :param padding_idx: should be set to None, not support now. + :param max_norm: should be set to None, not support now. + :param norm_type: should be set to None, not support now. + :param initial_weight: the learnable weights of the module of shape (num_embeddings, embedding_dim). + + Examples: + + .. testcode:: + + import numpy as np + import megengine as mge + import megengine.module as M + weight = mge.tensor(np.array([(1.2,2.3,3.4,4.5,5.6),(0.1,1.1,2.1,3.1,4.1)], dtype=np.float32)) + data = mge.tensor(np.array([(0,1,1),(1,0,1),(0,0,1)], dtype=np.int32)) + + embedding = M.Embedding(2, 5, initial_weight=weight) + output = embedding(data) + with np.printoptions(precision=6): + print(output.numpy()) + + Outputs: + + .. testoutput:: + + [[[1.2 2.3 3.4 4.5 5.6] + [0.1 1.1 2.1 3.1 4.1] + [0.1 1.1 2.1 3.1 4.1]] + + [[0.1 1.1 2.1 3.1 4.1] + [1.2 2.3 3.4 4.5 5.6] + [0.1 1.1 2.1 3.1 4.1]] + + [[1.2 2.3 3.4 4.5 5.6] + [1.2 2.3 3.4 4.5 5.6] + [0.1 1.1 2.1 3.1 4.1]]] + + """ + + def __init__( + self, + num_embeddings: int, + embedding_dim: int, + padding_idx: Optional[int] = None, + max_norm: Optional[float] = None, + norm_type: Optional[float] = None, + initial_weight: Parameter = None, + ): + super().__init__() + if padding_idx is not None: + raise ValueError("Not support padding index now.") + if max_norm is not None or norm_type is not None: + raise ValueError("Not support weight normalize now.") + self.padding_idx = padding_idx + self.max_norm = max_norm + self.norm_type = norm_type + self.num_embeddings = num_embeddings + self.embedding_dim = embedding_dim + if initial_weight is None: + self.weight = Parameter( + np.random.uniform( + size=(self.num_embeddings, self.embedding_dim) + ).astype(np.float32) + ) + self.reset_parameters() + else: + if initial_weight.shape != (num_embeddings, embedding_dim): + raise ValueError( + "The weight shape should match num_embeddings and embedding_dim" + ) + self.weight = Parameter(initial_weight.numpy()) + + def reset_parameters(self) -> None: + init.normal_(self.weight) + + def forward(self, inputs): + return embedding_func(inputs, self.weight) + + @classmethod + def from_pretrained( + cls, + embeddings: Parameter, + freeze: Optional[bool] = True, + padding_idx: Optional[int] = None, + max_norm: Optional[float] = None, + norm_type: Optional[float] = None, + ): + r""" + Creates Embedding instance from given 2-dimensional FloatTensor. + + :param embeddings: Tensor contained weight for the embedding. + :param freeze: If ``True``, the weight does not get updated during the learning process. Default: ``True``. + :param padding_idx: should be set to None, not support Now. + :param max_norm: should be set to None, not support Now. + :param norm_type: should be set to None, not support Now. + + Examples: + + .. testcode:: + + import numpy as np + import megengine as mge + import megengine.module as M + weight = mge.tensor(np.array([(1.2,2.3,3.4,4.5,5.6),(0.1,1.1,2.1,3.1,4.1)], dtype=np.float32)) + data = mge.tensor(np.array([(0,1,1),(1,0,1),(0,0,1)], dtype=np.int32)) + + embedding = M.Embedding.from_pretrained(weight, freeze=False) + output = embedding(data) + print(output.numpy()) + + Outputs: + + .. testoutput:: + + [[[1.2 2.3 3.4 4.5 5.6] + [0.1 1.1 2.1 3.1 4.1] + [0.1 1.1 2.1 3.1 4.1]] + + [[0.1 1.1 2.1 3.1 4.1] + [1.2 2.3 3.4 4.5 5.6] + [0.1 1.1 2.1 3.1 4.1]] + + [[1.2 2.3 3.4 4.5 5.6] + [1.2 2.3 3.4 4.5 5.6] + [0.1 1.1 2.1 3.1 4.1]]] + + + """ + embeddings_shape = embeddings.shape + embeddings_dim = len(embeddings_shape) + if embeddings_dim != 2: + raise ValueError("Embeddings parameter is expected to be 2-dimensional") + rows = embeddings_shape[0] + cols = embeddings_shape[1] + embedding = cls( + num_embeddings=rows, + embedding_dim=cols, + initial_weight=embeddings, + padding_idx=padding_idx, + max_norm=max_norm, + norm_type=norm_type, + ) + embedding.weight.requires_grad = not freeze + return embedding diff --git a/imperative/python/megengine/module/external.py b/imperative/python/megengine/module/external.py new file mode 100644 index 0000000000000000000000000000000000000000..387125c412642f4a69e178009b3538e8e5e47543 --- /dev/null +++ b/imperative/python/megengine/module/external.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +from ..functional import cambricon_subgraph, extern_opr_subgraph +from .module import Module + + +class CambriconSubgraph(Module): + r"""Load a serialized Cambricon subgraph. + + See :func:`~.cambricon_subgraph` for more details. + """ + + def __init__( + self, data, symbol, tensor_dim_mutable, + ): + super(CambriconSubgraph, self).__init__() + self._data = data + self.symbol = symbol + self.tensor_dim_mutable = tensor_dim_mutable + + @property + def data(self): + return self._data.tobytes() + + @data.setter + def data(self, val): + self._data = np.frombuffer(val, dtype=np.uint8) + + def forward(self, inputs): + outputs = cambricon_subgraph( + inputs, self._data, self.symbol, self.tensor_dim_mutable, + ) + return outputs + + +class ExternOprSubgraph(Module): + r"""Load a serialized extern opr subgraph. + """ + + def __init__(self, data, name, output_shapes): + super(ExternOprSubgraph, self).__init__() + self.data = data + self.name = name + self.output_shapes = output_shapes + + def forward(self, inputs): + outputs = extern_opr_subgraph(inputs, self.output_shapes, self.name, self.data,) + return outputs diff --git a/imperative/python/megengine/module/identity.py b/imperative/python/megengine/module/identity.py new file mode 100644 index 0000000000000000000000000000000000000000..51b31e505370020a14744e39054979da5c197027 --- /dev/null +++ b/imperative/python/megengine/module/identity.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from ..functional import identity +from .module import Module + + +class Identity(Module): + r"""A placeholder identity operator that will ignore any argument.""" + + def forward(self, x): + return identity(x) diff --git a/imperative/python/megengine/module/init.py b/imperative/python/megengine/module/init.py new file mode 100644 index 0000000000000000000000000000000000000000..c2cb50755a5f6fe0e0819d0fa3e87c57e6a73e80 --- /dev/null +++ b/imperative/python/megengine/module/init.py @@ -0,0 +1,261 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import math +from functools import reduce +from typing import Optional, Tuple, Union + +import numpy as np + +from ..tensor import Tensor + + +def fill_(tensor: Tensor, val: Union[float, int]) -> None: + """Fill the given ``tensor`` with value ``val``. + + :param tensor: An n-dimentional tensor to be initialized + :param val: The value to be filled throughout the tensor + """ + tensor.set_value(np.full(tensor.shape, val, tensor.dtype)) + + +def zeros_(tensor: Tensor) -> None: + """Fill the given ``tensor`` with scalar value `0`. + + :param tensor: An n-dimentional tensor to be initialized + """ + fill_(tensor, 0) + + +def ones_(tensor: Tensor) -> None: + """Fill the given ``tensor`` with the scalar value `1`. + + :param tensor: An n-dimentional tensor to be initialized + """ + fill_(tensor, 1) + + +def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None: + r"""Fill the given ``tensor`` with random value sampled from uniform distribution + :math:`\mathcal{U}(\text{a}, \text{b})`. + + :param tensor: An n-dimentional tensor to be initialized + :param a: Lower bound of the sampling interval + :param b: Upper bound of the sampling interval + """ + tensor.set_value(np.random.uniform(a, b, tensor.shape).astype(tensor.dtype)) + + +def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: + r"""Fill the given ``tensor`` with random value sampled from normal distribution + :math:`\mathcal{N}(\text{mean}, \text{std}^2)`. + + :param tensor: An n-dimentional tensor to be initialized + :param mean: The mean of the normal distribution + :param std: The standard deviation of the normal distribution + """ + tensor.set_value(np.random.normal(mean, std, tensor.shape).astype(np.float32)) + + +def calculate_gain( + nonlinearity: str, param: Optional[Union[int, float]] = None +) -> float: + r"""Return a recommended gain value (see the table below) for the given nonlinearity + function. + + ================= ==================================================== + nonlinearity gain + ================= ==================================================== + Linear / Identity :math:`1` + Conv{1,2,3}D :math:`1` + Sigmoid :math:`1` + Tanh :math:`\frac{5}{3}` + ReLU :math:`\sqrt{2}` + Leaky Relu :math:`\sqrt{\frac{2}{1 + \text{negative_{slope}}^2}}` + ================= ==================================================== + + :param nonlinearity: Name of the non-linear function + :param param: Optional parameter for leaky_relu. Only effective when + ``nonlinearity`` is "leaky_relu". + + """ + linear_fns = [ + "linear", + "conv1d", + "conv2d", + "conv3d", + "conv_transpose1d", + "conv_transpose2d", + "conv_transpose3d", + ] + if nonlinearity in linear_fns or nonlinearity == "sigmoid": + return 1 + if nonlinearity == "tanh": + return 5.0 / 3 + if nonlinearity == "relu": + return math.sqrt(2.0) + if nonlinearity == "leaky_relu": + if param is None: + negative_slope = 0.01 + elif ( + not isinstance(param, bool) + and isinstance(param, int) + or isinstance(param, float) + ): + # True/False are instances of int, hence check above + negative_slope = param + else: + raise ValueError("negative_slope {} not a valid number".format(param)) + return math.sqrt(2.0 / (1 + negative_slope ** 2)) + raise ValueError("Unsupported nonlinearity {}".format(nonlinearity)) + + +def calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[float, float]: + """ + Calculate fan_in / fan_out value for given weight tensor. This function assumes + input tensor is stored in NCHW format. + + :param tensor: Weight tensor in NCHW format + """ + shape = tensor.shape + ndim = len(shape) + if ndim < 2: + raise ValueError( + "fan_in and fan_out can not be computed for tensor with fewer than 2 " + "dimensions" + ) + + if ndim == 2: # Linear + fan_in = shape[1] + fan_out = shape[0] + else: + num_input_fmaps = shape[1] + num_output_fmaps = shape[0] + receptive_field_size = 1 + if ndim > 2: + receptive_field_size = reduce(lambda x, y: x * y, shape[2:], 1) + fan_in = num_input_fmaps * receptive_field_size + fan_out = num_output_fmaps * receptive_field_size + return fan_in, fan_out + + +def calculate_correct_fan(tensor: Tensor, mode: str) -> float: + """ + Calculate fan_in or fan_out value for given weight tensor, depending on given + ``mode``. + + See :func:`calculate_fan_in_and_fan_out` for details. + + :param tensor: Weight tensor in NCHW format + :param mode: ``'fan_in'`` or ``'fan_out'`` + """ + mode = mode.lower() + valid_modes = ["fan_in", "fan_out"] + if mode not in valid_modes: + raise ValueError( + "Mode {} not supported, please use one of {}".format(mode, valid_modes) + ) + + fan_in, fan_out = calculate_fan_in_and_fan_out(tensor) + return fan_in if mode == "fan_in" else fan_out + + +def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: + r"""Fill ``tensor`` with random values sampled from :math:`\mathcal{U}(-a, a)` + where + + .. math:: + a = \text{gain} \times \sqrt{\frac{6}{\text{fan_in} + \text{fan_out}}} + + Also known as Glorot initialization. Detailed information can be retrieved from + `Understanding the difficulty of training deep feedforward neural networks` - + Glorot, X. & Bengio, Y. (2010). + + :param tensor: An n-dimentional tensor to be initialized + :param gain: Scaling factor for :math:`a`. + """ + fan_in, fan_out = calculate_fan_in_and_fan_out(tensor) + std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) + a = math.sqrt(3.0) * std + uniform_(tensor, -a, a) + + +def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: + r"""Fill ``tensor`` with random values sampled from + :math:`\mathcal{N}(0, \text{std}^2)` where + + .. math:: + \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan_in} + \text{fan_out}}} + + Also known as Glorot initialization. Detailed information can be retrieved from + `Understanding the difficulty of training deep feedforward neural networks` - + Glorot, X. & Bengio, Y. (2010). + + :param tensor: An n-dimentional tensor to be initialized + :param gain: Scaling factor for :math:`std`. + """ + fan_in, fan_out = calculate_fan_in_and_fan_out(tensor) + std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) + normal_(tensor, 0.0, std) + + +def msra_uniform_( + tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" +) -> None: + r"""Fill ``tensor`` wilth random values sampled from + :math:`\mathcal{U}(-\text{bound}, \text{bound})` where + + .. math:: + \text{bound} = \sqrt{\frac{6}{(1 + a^2) \times \text{fan_in}}} + + Detailed information can be retrieved from + `Delving deep into rectifiers: Surpassing human-level performance on ImageNet + classification` + + :param tensor: An n-dimentional tensor to be initialized + :param a: Optional parameter for calculating gain for leaky_relu. See + :func:`calculate_gain` for details. + :param mode: ``'fan_in'`` or ``'fan_out'``, used to calculate :math:`gain`, the + scaling factor for :math:`bound`. See :func:`calculate_fan_in_and_fan_out` for + details. + :param nonlinearity: Name of the non-linear function used to calculate :math:`gain`. + See :func:`calculate_gain` for details. + """ + fan = calculate_correct_fan(tensor, mode) + gain = calculate_gain(nonlinearity, a) + std = gain / math.sqrt(fan) + bound = math.sqrt(3.0) * std + uniform_(tensor, -bound, bound) + + +def msra_normal_( + tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" +) -> None: + r"""Fill ``tensor`` wilth random values sampled from + :math:`\mathcal{N}(0, \text{std}^2)` where + + .. math:: + \text{std} = \sqrt{\frac{2}{(1 + a^2) \times \text{fan_in}}} + + Detailed information can be retrieved from + `Delving deep into rectifiers: Surpassing human-level performance on ImageNet + classification` + + :param tensor: An n-dimentional tensor to be initialized + :param a: Optional parameter for calculating gain for leaky_relu. See + :func:`calculate_gain` for details. + :param mode: ``'fan_in'`` or ``'fan_out'``, used to calculate :math:`gain`, the + scaling factor for :math:`gain`. See :func:`calculate_fan_in_and_fan_out` for + details. + :param nonlinearity: Name of the non-linear function used to calculate :math:`gain`. + See :func:`calculate_gain` for details. + """ + fan = calculate_correct_fan(tensor, mode) + gain = calculate_gain(nonlinearity, a) + std = gain / math.sqrt(fan) + normal_(tensor, 0, std) diff --git a/imperative/python/megengine/module/linear.py b/imperative/python/megengine/module/linear.py new file mode 100644 index 0000000000000000000000000000000000000000..34900a2e43b09ce7dcf0ab426d85942342cf72a4 --- /dev/null +++ b/imperative/python/megengine/module/linear.py @@ -0,0 +1,61 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +from ..functional import linear +from ..tensor_nn import Parameter +from . import init +from .module import Module + + +class Linear(Module): + r"""Applies a linear transformation to the input. For instance, if input + is x, then output y is: + + .. math:: + + y = xW^T + b + + where :math:`y_i= \sum_j W_{ij} x_j + b_i` + + :param in_features: size of each input sample. + :param out_features: size of each output sample. + :param bias: If set to ``False``, the layer will not learn an additive bias. + Default: ``True`` + + """ + + def __init__( + self, in_features: int, out_features: int, bias: bool = True, **kwargs + ): + super().__init__(**kwargs) + self.out_features = out_features + self.in_features = in_features + w_shape = (out_features, in_features) + self.weight = Parameter(np.zeros(w_shape, dtype=np.float32)) + self.bias = None + if bias: + b_shape = (out_features,) + self.bias = Parameter(np.zeros(b_shape, dtype=np.float32)) + self.reset_parameters() + + def _get_fanin(self): + return self.in_features + + def reset_parameters(self) -> None: + fanin = self._get_fanin() + std = np.sqrt(1 / fanin) + init.normal_(self.weight, 0.0, std) + if self.bias is not None: + init.zeros_(self.bias) + + def _calc_linear(self, x, weight, bias): + return linear(x, weight, bias) + + def forward(self, x): + return self._calc_linear(x, self.weight, self.bias) diff --git a/imperative/python/megengine/module/module.py b/imperative/python/megengine/module/module.py new file mode 100644 index 0000000000000000000000000000000000000000..723a9fbbb22d444d857eebfead206741295241a6 --- /dev/null +++ b/imperative/python/megengine/module/module.py @@ -0,0 +1,508 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from abc import ABCMeta, abstractmethod +from collections import OrderedDict +from typing import Any, Callable, Iterable, Optional, Set, Tuple, Union + +import numpy as np + +from ..core.tensor.dtype import is_quantize +from ..logger import get_logger +from ..tensor import Tensor +from ..tensor_nn import Buffer, Parameter +from ..utils.hook import HookHandler + +logger = get_logger(__name__) + + +def _expand_structure(key, obj): + if isinstance(obj, (Tensor, Module)): + return [(key, obj)] + elif isinstance(obj, (list, tuple, dict)): + ret = [] + if isinstance(obj, dict): + targets = ((k, obj[k]) for k in sorted(obj)) + else: + targets = ((str(k), v) for k, v in enumerate(obj)) + for k, o in targets: + sub_ret = _expand_structure(k, o) + if sub_ret and not isinstance(k, str): + raise AssertionError( + "keys for Tensor and Module must be str, error key: {}".format(k) + ) + for kt, vt in sub_ret: + ret.extend([(key + "." + kt, vt)]) + return ret + else: + return [] + + +def _is_parameter(obj): + return isinstance(obj, Parameter) + + +def _is_buffer(obj): + return isinstance(obj, Buffer) + + +def _is_module(obj): + return isinstance(obj, Module) + + +class Module(metaclass=ABCMeta): + """Base Module class. + """ + + def __init__(self): + # runtime attributes + self.training = True + self.quantize_disabled = False + + # hooks + self._forward_pre_hooks = OrderedDict() + self._forward_hooks = OrderedDict() + + @abstractmethod + def forward(self, inputs): + pass + + def register_forward_pre_hook(self, hook: Callable) -> HookHandler: + """Register a hook to handle forward inputs. `hook` should be a function + + Note that `inputs` keyword inputs + + :param hook: a function that receive `module` and `inputs`, then return + a modified `inputs` or `None`. + :return: a handler with :meth:`~.HookHandler.remove` interface to delete the hook. + """ + return HookHandler(self._forward_pre_hooks, hook) + + def register_forward_hook(self, hook: Callable) -> HookHandler: + """Register a hook to handle forward results. `hook` should be a function that + receive `module`, `inputs` and `outputs`, then return a modified `outputs` or `None`. + + This method return a handler with :meth:`~.HookHandler.remove` interface to delete the hook. + """ + return HookHandler(self._forward_hooks, hook) + + def __call__(self, *inputs, **kwargs): + for hook in self._forward_pre_hooks.values(): + modified_inputs = hook(self, inputs) + if modified_inputs is not None: + if not isinstance(modified_inputs, tuple): + modified_inputs = (modified_inputs,) + inputs = modified_inputs + + outputs = self.forward(*inputs, **kwargs) + + for hook in self._forward_hooks.values(): + modified_outputs = hook(self, inputs, outputs) + if modified_outputs is not None: + outputs = modified_outputs + return outputs + + def _flatten( + self, + *, + recursive: bool = True, + with_key: bool = False, + with_parent: bool = False, + prefix: Optional[str] = None, + predicate: Callable[[Any], bool] = lambda _: True, + seen: Optional[Set[int]] = None + ) -> Union[Iterable[Any], Iterable[Tuple[str, Any]]]: + """Scans the module object and returns an iterable for the :class:`~.Tensor` + and :class:`~.Module` attributes that agree with the ``predicate``. For multiple + calls of this function with same arguments, the order of objects within the + returned iterable is guaranteed to be identical, as long as all the involved + module objects' ``__dict__`` does not change thoughout those calls. + + :param recursive: Whether to recursively scan all the submodules. + :param with_key: Whether to yield keys along with yielded objects. + :param with_parent: Whether to yield ``self`` along with yielded objects. + :param prefix: The prefix appended to the yielded keys. + :param predicate: The predicate function applied to scanned objects. + :param seen: A dict that records whether a module has been traversed yet. + """ + if seen is None: + seen = set([id(self)]) + + module_dict = vars(self) + _prefix = "" if prefix is None else prefix + "." + + for key in sorted(module_dict): + for expanded_key, leaf in _expand_structure(key, module_dict[key]): + leaf_id = id(leaf) + if leaf_id in seen: + continue + seen.add(leaf_id) + + if predicate(leaf): + if with_key and with_parent: + yield _prefix + expanded_key, leaf, self + elif with_key: + yield _prefix + expanded_key, leaf + elif with_parent: + yield leaf, self + else: + yield leaf + + if recursive and isinstance(leaf, Module): + yield from leaf._flatten( + recursive=recursive, + with_key=with_key, + with_parent=with_parent, + prefix=_prefix + expanded_key if with_key else None, + predicate=predicate, + seen=seen, + ) + + def parameters( + self, requires_grad: Optional[bool] = None, recursive: bool = True, **kwargs + ) -> Iterable[Parameter]: + r"""Returns an iterable for the :class:`~.Parameter` of the module. + + :param requires_grad: Limitation over the :attr:`~.Parameter.requires_grad` + attribute of returned :class:`.Parameter`. ``None`` for no limitation. + :param recursive: If ``True``, returns all :class:`~.Parameter` within this + module, else only returns :class:`~.Parameter` that are direct attributes + of this module. + """ + + def predicate(obj) -> bool: + return _is_parameter(obj) and ( + requires_grad is None or obj.requires_grad == requires_grad + ) + + yield from self._flatten( + with_key=False, predicate=predicate, recursive=recursive, **kwargs + ) + + def named_parameters( + self, + requires_grad: Optional[bool] = None, + prefix: Optional[str] = None, + recursive: bool = True, + **kwargs + ) -> Iterable[Tuple[str, Parameter]]: + """Returns an iterable for key :class:`~.Parameter` pairs of the module, where + ``key`` is the dotted path from this module to the :class:`~.Parameter` . + + :param requires_grad: Limitation over the :attr:`~.Parameter.requires_grad` + attribute of returned :class:`~.Parameter` . ``None`` for no limitation. + :param prefix: The prefix prepended to the keys. + :param recursive: If ``True``, returns all :class:`~.Parameter` within this + module, else only returns :class:`~.Parameter` that are direct attributes + of this module. + """ + + def predicate(obj) -> bool: + return _is_parameter(obj) and ( + requires_grad is None or obj.requires_grad == requires_grad + ) + + yield from self._flatten( + with_key=True, + prefix=prefix, + predicate=predicate, + recursive=recursive, + **kwargs, + ) + + def buffers(self, recursive: bool = True, **kwargs) -> Iterable[Buffer]: + """Returns an iterable for the :class:`~.Buffer` of the module. + + :param recursive: If ``True``, returns all :class:`~.Buffer` within this + module, else only returns :class:`~.Buffer` that are direct attributes + of this module. + """ + yield from self._flatten( + with_key=False, predicate=_is_buffer, recursive=recursive, **kwargs + ) + + def named_buffers( + self, prefix: Optional[str] = None, recursive: bool = True, **kwargs + ) -> Iterable[Tuple[str, Buffer]]: + """Returns an iterable for key :class:`~.Buffer` pairs of the module, where + ``key`` is the dotted path from this module to the :class:`~.Buffer` . + + :param prefix: The prefix prepended to the keys. + :param recursive: If ``True``, returns all :class:`~.Buffer` within this + module, else only returns :class:`~.Buffer` that are direct attributes + of this module. + """ + yield from self._flatten( + with_key=True, + prefix=prefix, + predicate=_is_buffer, + recursive=recursive, + **kwargs, + ) + + def children(self, **kwargs) -> "Iterable[Module]": + """Returns an iterable for all the submodules that are direct attributes of this + module. + """ + yield from self._flatten( + with_key=False, predicate=_is_module, recursive=False, **kwargs + ) + + def named_children(self, **kwargs) -> "Iterable[Tuple[str, Module]]": + """Returns an iterable of key-submodule pairs for all the submodules that are + direct attributes of this module, where 'key' is the attribute name of + submodules. + """ + yield from self._flatten( + with_key=True, predicate=_is_module, recursive=False, **kwargs + ) + + def modules(self, **kwargs) -> "Iterable[Module]": + """Returns an iterable for all the modules within this module, including itself. + """ + if "with_parent" in kwargs and kwargs["with_parent"]: + yield self, None + else: + yield self + yield from self._flatten(with_key=False, predicate=_is_module, **kwargs) + + def named_modules( + self, prefix: Optional[str] = None, **kwargs + ) -> "Iterable[Tuple[str, Module]]": + """Returns an iterable of key-module pairs for all the modules within this + module, including itself, where 'key' is the dotted path from this module to the + submodules. + + :param prefix: The prefix prepended to the path. + """ + if "with_parent" in kwargs and kwargs["with_parent"]: + yield ("" if prefix is None else prefix), self, None + else: + yield ("" if prefix is None else prefix), self + yield from self._flatten( + with_key=True, prefix=prefix, predicate=_is_module, **kwargs + ) + + def apply(self, fn: "Callable[[Module], Any]") -> None: + """Apply function ``fn`` to all the modules within this module, including + itself. + + :param fn: The function to be applied on modules. + """ + for it in self.modules(): + fn(it) + + def zero_grad(self) -> None: + """Set all parameters' grads to zero + """ + for param in self.parameters(): + if param.grad is not None: + param.grad.reset_zero() + + def train(self, mode: bool = True, recursive: bool = True) -> None: + """Set training mode of all the modules within this module (including itself) to + ``mode``. This effectively sets the ``training`` attributes of those modules + to ``mode``, but only has effect on certain modules (e.g. + :class:`~.BatchNorm2d`, :class:`~.Dropout`, :class:`~.Observer`) + + :param mode: the training mode to be set on modules. + :param recursive: whether to recursively call submodules' ``train()``. + """ + if not recursive: + self.training = mode + return + + def fn(module: Module) -> None: + module.train(mode, recursive=False) + + self.apply(fn) + + def eval(self) -> None: + """Set training mode of all the modules within this module (including itself) to + ``False``. See :meth:`~.Module.train` for details. + """ + self.train(False) + + def disable_quantize(self, value=True): + r""" + Set ``module``'s ``quantize_disabled`` attribute and return ``module``. + Could be used as a decorator. + """ + + def fn(module: Module) -> None: + module.quantize_disabled = value + + self.apply(fn) + + def replace_param( + self, params: dict, start_pos: int, seen: Optional[Set[int]] = None + ): + """Replace module's parameters with `params`, used by :class:`~.ParamPack` to + speedup multimachine training. + """ + offset = 0 + if seen is None: + seen = set([id(self)]) + module_dict = vars(self) + for key in sorted(module_dict): + hash_id = id(module_dict[key]) + if hash_id in seen: + continue + seen.add(hash_id) + if isinstance(module_dict[key], Parameter): + if start_pos + offset in params: + assert module_dict[key].shape == params[start_pos + offset].shape + module_dict[key] = params[start_pos + offset] + offset += 1 + if isinstance(module_dict[key], Module): + offset += module_dict[key].replace_param( + params, start_pos + offset, seen + ) + return offset + + def state_dict(self, rst=None, prefix="", keep_var=False): + r"""Returns a dictionary containing whole states of the module. + """ + + def is_state(obj): + return _is_parameter(obj) or _is_buffer(obj) + + if rst is None: + rst = OrderedDict() + + for k, v in self._flatten(recursive=False, with_key=True, predicate=is_state): + assert prefix + k not in rst, "duplicated state: {}".format(k) + if keep_var: + rst[prefix + k] = v + else: + rst[prefix + k] = v.numpy() + + for k, submodule in self._flatten( + recursive=False, + with_key=True, + predicate=lambda obj: isinstance(obj, Module), + ): + submodule.state_dict(rst, prefix + k + ".", keep_var) + + return rst + + def load_state_dict( + self, + state_dict: Union[dict, Callable[[str, Tensor], Optional[np.ndarray]]], + strict=True, + ): + r"""Load a given dictionary created by :func:`state_dict` into this module. + If ``strict`` is ``True``, the keys of :func:`state_dict` must exactly match the keys + returned by :func:`state_dict`. + + Users can also pass a closure: `Function[key: str, var: Tensor] -> Optional[np.ndarray]` + as a `state_dict`, in order to handle complex situations. For example, load everything + except for the final linear classifier: + + .. code-block:: + + state_dict = {...} # Dict[str, np.ndarray] + model.load_state_dict({ + k: None if k.startswith('fc') else v + for k, v in state_dict.items() + }, strict=False) + + Here returning `None` means skipping parameter `k`. + + To prevent shape mismatch (e.g. load PyTorch weights), we can reshape before loading: + + .. code-block:: + + state_dict = {...} + def reshape_accordingly(k, v): + return state_dict[k].reshape(v.shape) + model.load_state_dict(reshape_accordingly) + + We can also perform inplace re-initialization or pruning: + + .. code-block:: + + def reinit_and_pruning(k, v): + if 'bias' in k: + M.init.zero_(v) + if 'conv' in k: + return v.numpy() * (np.abs(v.numpy()) > 1e-3).astype("float32) + model.load_state_dict(reinit_and_pruning, strict=False) + """ + unused = [] + if isinstance(state_dict, dict): + unused = state_dict.keys() + + def closure(k, _): # var unused + return state_dict[k] if k in state_dict else None + + elif callable(state_dict): + closure = state_dict + else: + raise ValueError( + "`state_dict` must load a dict or callable, got {}".format( + type(state_dict) + ) + ) + + loaded, skipped = self._load_state_dict_with_closure(closure) + unused = set(unused) - loaded + + if len(unused) != 0: + if strict: + raise KeyError( + "Unused params violate `strict=True`, unused={}".format(unused) + ) + else: + logger.warning( + "Unused params in `strict=False` mode, unused={}".format(unused) + ) + + if len(skipped) != 0: + if strict: + raise KeyError( + "Missing params violate `strict=True`, missing={}".format(skipped) + ) + else: + logger.warning( + "Missing params in `strict=False` mode, missing={}".format(skipped) + ) + + def _load_state_dict_with_closure(self, closure): + """Advance state_dict load through callable `closure` whose signature is + + `closure(key: str, var: Tensor) -> Union[np.ndarry, None]` + """ + assert callable(closure), "closure must be a function" + + loaded = [] + skipped = [] + + local_state_dict = self.state_dict(keep_var=True) + for k, var in local_state_dict.items(): + to_be_load = closure(k, var) + if to_be_load is None: + skipped.append(k) + continue + assert isinstance( + to_be_load, np.ndarray + ), "closure should return a `np.ndarray`, now `{}` get {}".format( + k, to_be_load + ) + assert ( + var.shape == to_be_load.shape + ), "param `{}` shape mismatch, should be {}, get {}".format( + k, var.shape, to_be_load.shape + ) + # For quantized dtype, the initialized dtype + # scale/zero_points maybe invalid, use pretrained dtype instead. + if is_quantize(to_be_load.dtype) and is_quantize(var.dtype): + var = var.astype(to_be_load.dtype) + var.set_value(to_be_load) + loaded.append(k) + + return set(loaded), set(skipped) diff --git a/imperative/python/megengine/module/parampack.py b/imperative/python/megengine/module/parampack.py new file mode 100644 index 0000000000000000000000000000000000000000..feb14c613aa5198c1f1d9bc627302fb3b868f914 --- /dev/null +++ b/imperative/python/megengine/module/parampack.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections +from typing import Callable, Iterable, Optional, Tuple + +import numpy as np + +from ..tensor_nn import Parameter, Tensor +from .module import Module + + +class ParamPack(Module): + r"""Pack module's parameters by gathering their memory to continuous address. + Using (device, dtype, requires_grad) as key, for example ('gpu0', float32, True), + parameters with same key will be packed togather. + It helps a lot for multimachine training by speeding up allreduce gradients. + + :param model: the module you want to pack parameters. + :param nr_ignore_first: how many parameters will be unpacked at first. + :param max_size_per_group: upper bound of packed parameters' size in MB. + :param max_nr_params_per_group: upper bound of the number of parameters of each group. + + """ + + def __init__( + self, + model: Module, + nr_ignore_first: int = 8, + max_size_per_group: int = 10, + max_nr_params_per_group: int = 100, + group_func: Callable = lambda name, param: 0, + ): + super().__init__() + self._model = model + self._nr_ignore_first = nr_ignore_first + self._max_size_per_group = max_size_per_group + self._max_nr_params_per_group = max_nr_params_per_group + self._group_func = group_func + self._grouped_params = [] + self._packed_params = [] + + params = model.named_parameters() + self._pack_params(params) + + def parameters(self, requires_grad: Optional[bool] = None) -> Iterable[Parameter]: + for param in self._packed_params: + if requires_grad is None or param.requires_grad == requires_grad: + yield param + + def named_parameters( + self, requires_grad: Optional[bool] = None + ) -> Iterable[Tuple[str, Parameter]]: + for idx, param in enumerate(self._packed_params): + if requires_grad is None or param.requires_grad == requires_grad: + yield "packed_param_" + str(idx), param + + def _pack_params(self, params: Iterable[Tuple[str, Parameter]]): + groups = collections.defaultdict(list) + ignored = 0 + param_id = 0 + for name, param in params: + if self._nr_ignore_first > ignored: + ignored += 1 + self._grouped_params.append([{"shape": param.shape, "id": param_id}]) + param.pack_group_key = self._group_func(name, param) + self._packed_params.append(param) + else: + key = ( + param.dtype, + param.device, + param.requires_grad, + self._group_func(name, param), + ) + groups[key].append({"tensor": param, "id": param_id}) + param_id += 1 + for (dtype, device, requires_grad, group_key) in groups.keys(): + dtype_sz = np.dtype(dtype).itemsize + align = device.mem_align + if align < dtype_sz: + align = 1 + else: + assert align % dtype_sz == 0 + align //= dtype_sz + + group = groups[(dtype, device, requires_grad, group_key)] + while group: + aligned_pos = [] + offset = 0 + params = [] + idx = 0 + while idx < len(group): + param = group[idx] + assert param["tensor"].device == device + padding = (align - (offset & (align - 1))) & (align - 1) + offset += padding + aligned_pos.append(offset) + params.append(param) + offset += int(np.prod(param["tensor"].shape)) + idx += 1 + + if ( + offset * dtype_sz >= self._max_size_per_group * 1024 * 1024 + or idx >= self._max_nr_params_per_group + ): + break + group = group[idx:] + if idx == 1: + # ignore param packs with only one item + params[0]["tensor"].pack_group_key = group_key + self._packed_params.append(params[0]["tensor"]) + self._grouped_params.append( + [{"shape": params[0]["tensor"].shape, "id": params[0]["id"]}] + ) + continue + + packed_value = np.zeros((offset,), dtype=dtype) + for param, pos in zip(params, aligned_pos): + val = param["tensor"].numpy() + packed_value[pos : pos + val.size] = val.flatten() + new_param = Parameter( + value=packed_value, + device=device, + dtype=dtype, + requires_grad=requires_grad, + ) + new_param.pack_group_key = group_key + self._packed_params.append(new_param) + self._grouped_params.append( + [{"shape": i["tensor"].shape, "id": i["id"]} for i in params] + ) + + def forward(self, *args, **kwargs): + replace_param = dict() + for i in range(len(self._packed_params)): + packed_param = self._packed_params[i] + grouped_params = self._grouped_params[i] + if len(grouped_params) == 1: + continue + split = param_pack_split( + packed_param._symvar, [i["shape"] for i in grouped_params] + ) + split = [ + Parameter(Tensor(i, requires_grad=packed_param.requires_grad)) + for i in split + ] + for j in range(len(split)): + replace_param[grouped_params[j]["id"]] = split[j] + self._model.replace_param(replace_param, 0) + + return self._model.forward(*args, **kwargs) diff --git a/imperative/python/megengine/module/pooling.py b/imperative/python/megengine/module/pooling.py new file mode 100644 index 0000000000000000000000000000000000000000..8126ddc1f7ecd169efdcfdf620691da7f0f67140 --- /dev/null +++ b/imperative/python/megengine/module/pooling.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from abc import abstractmethod +from typing import Tuple, Union + +from ..functional import avg_pool2d, max_pool2d +from .module import Module + + +class _PoolNd(Module): + def __init__( + self, + kernel_size: Union[int, Tuple[int, int]], + stride: Union[int, Tuple[int, int]] = None, + padding: Union[int, Tuple[int, int]] = 0, + ): + super(_PoolNd, self).__init__() + self.kernel_size = kernel_size + self.stride = stride or kernel_size + self.padding = padding + + @abstractmethod + def forward(self, inp): + pass + + +class MaxPool2d(_PoolNd): + r"""Applies a 2D max pooling over an input. + + For instance, given an input of the size :math:`(N, C, H, W)` and + :attr:`kernel_size` :math:`(kH, kW)`, this layer generates the output of + the size :math:`(N, C, H_{out}, W_{out})` through a process described as: + + .. math:: + \begin{aligned} + out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} + \text{input}(N_i, C_j, \text{stride[0]} \times h + m, + \text{stride[1]} \times w + n) + \end{aligned} + + If :attr:`padding` is non-zero, then the input is implicitly zero-padded on + both sides for :attr:`padding` number of points. + + :param kernel_size: the size of the window to take a max over. + :param stride: the stride of the window. Default value is ``kernel_size``. + :param padding: implicit zero padding to be added on both sides. + """ + + def forward(self, inp): + return max_pool2d(inp, self.kernel_size, self.stride, self.padding) + + +class AvgPool2d(_PoolNd): + r"""Applies a 2D average pooling over an input. + + For instance, given an input of the size :math:`(N, C, H, W)` and + :attr:`kernel_size` :math:`(kH, kW)`, this layer generates the output of + the size :math:`(N, C, H_{out}, W_{out})` through a process described as: + + .. math:: + + out(N_i, C_j, h, w) = \frac{1}{kH * kW} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1} + input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n) + + If :attr:`padding` is non-zero, then the input is implicitly zero-padded on + both sides for :attr:`padding` number of points. + + :param kernel_size: the size of the window. + :param stride: the stride of the window. Default value is ``kernel_size``. + :param padding: implicit zero padding to be added on both sides. + """ + + def forward(self, inp): + return avg_pool2d(inp, self.kernel_size, self.stride, self.padding) diff --git a/imperative/python/megengine/module/qat/__init__.py b/imperative/python/megengine/module/qat/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b6adab4dc687a322fba6dd5652bdf8975933ad3a --- /dev/null +++ b/imperative/python/megengine/module/qat/__init__.py @@ -0,0 +1,14 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .concat import Concat +from .conv import Conv2d, ConvRelu2d +from .conv_bn import ConvBn2d, ConvBnRelu2d +from .elemwise import Elemwise +from .linear import Linear +from .module import QATModule +from .quant_dequant import DequantStub, QuantStub diff --git a/imperative/python/megengine/module/qat/concat.py b/imperative/python/megengine/module/qat/concat.py new file mode 100644 index 0000000000000000000000000000000000000000..a1f018938ee9fab92159c68a9e3fe7ddf5f5d3cc --- /dev/null +++ b/imperative/python/megengine/module/qat/concat.py @@ -0,0 +1,30 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Iterable + +from ...tensor import Tensor +from .. import concat as Float +from .module import QATModule + + +class Concat(Float.Concat, QATModule): + r""" + A :class:`~.QATModule` to do functional concat with QAT support. + Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. + """ + + def forward(self, inps: Iterable[Tensor], axis: int = 0): + return self.apply_quant_activation(super().forward(inps, axis)) + + @classmethod + def from_float_module(cls, float_module): + r""" + Return a :class:`~.QATModule` instance converted from + a float :class:`~.Module` instance. + """ + return cls() diff --git a/imperative/python/megengine/module/qat/conv.py b/imperative/python/megengine/module/qat/conv.py new file mode 100644 index 0000000000000000000000000000000000000000..315da839ed278d811fbb5895516140b5f3060129 --- /dev/null +++ b/imperative/python/megengine/module/qat/conv.py @@ -0,0 +1,59 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from ... import functional as F +from ...quantization.utils import fake_quant_bias +from .. import conv as Float +from .module import QATModule + + +class Conv2d(Float.Conv2d, QATModule): + r""" + A :class:`~.QATModule` Conv2d with QAT support. + Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. + """ + + def calc_conv_qat(self, inp): + w_qat = self.apply_quant_weight(self.weight) + b_qat = fake_quant_bias(self.bias, inp, w_qat) + conv = self.calc_conv(inp, w_qat, b_qat) + return conv + + @classmethod + def from_float_module(cls, float_module: Float.Conv2d): + r""" + Return a :class:`~.QATModule` instance converted from + a float :class:`~.Module` instance. + """ + qat_module = cls( + float_module.in_channels, + float_module.out_channels, + float_module.kernel_size, + float_module.stride, + float_module.padding, + float_module.dilation, + float_module.groups, + float_module.bias is not None, + float_module.conv_mode.name, + float_module.compute_mode.name, + ) + qat_module.weight = float_module.weight + qat_module.bias = float_module.bias + return qat_module + + def forward(self, inp): + return self.apply_quant_activation(self.calc_conv_qat(inp)) + + +class ConvRelu2d(Conv2d): + r""" + A :class:`~.QATModule` include Conv2d and Relu with QAT support. + Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. + """ + + def forward(self, inp): + return self.apply_quant_activation(F.relu(self.calc_conv_qat(inp))) diff --git a/imperative/python/megengine/module/qat/conv_bn.py b/imperative/python/megengine/module/qat/conv_bn.py new file mode 100644 index 0000000000000000000000000000000000000000..baa0d769ca2034e80f77a1f890d52bdbac13ea48 --- /dev/null +++ b/imperative/python/megengine/module/qat/conv_bn.py @@ -0,0 +1,193 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from ...functional import add_update, ones, relu, sqrt, sum, zeros +from ...quantization.utils import fake_quant_bias +from .. import conv_bn as Float +from .module import QATModule + + +class _ConvBnActivation2d(Float._ConvBnActivation2d, QATModule): + def get_batch_mean_var(self, inp): + def _sum_channel(inp, axis=0, keepdims=True): + if isinstance(axis, int): + out = sum(inp, axis=axis, keepdims=keepdims) + elif isinstance(axis, tuple): + for idx, elem in enumerate(axis): + out = sum(inp if idx == 0 else out, axis=elem, keepdims=keepdims) + return out + + sum1 = _sum_channel(inp, (0, 2, 3)) + sum2 = _sum_channel(inp ** 2, (0, 2, 3)) + reduce_size = inp.size / inp.shape[1] + batch_mean = sum1 / reduce_size + batch_var = (sum2 - sum1 ** 2 / reduce_size) / reduce_size + return batch_mean, batch_var + + def fold_weight_bias(self, bn_mean, bn_var): + # get fold bn conv param + # bn_istd = 1 / bn_std + # w_fold = gamma / bn_std * W + # b_fold = gamma * (b - bn_mean) / bn_std + beta + gamma = self.bn.weight + if gamma is None: + gamma = ones((self.bn.num_features), dtype="float32") + gamma = gamma.reshape(1, -1, 1, 1) + beta = self.bn.bias + if beta is None: + beta = zeros((self.bn.num_features), dtype="float32") + beta = beta.reshape(1, -1, 1, 1) + + if bn_mean is None: + bn_mean = zeros((1, self.bn.num_features, 1, 1), dtype="float32") + if bn_var is None: + bn_var = ones((1, self.bn.num_features, 1, 1), dtype="float32") + + conv_bias = self.conv.bias + if conv_bias is None: + conv_bias = zeros(self.conv._infer_bias_shape(), dtype="float32") + + bn_istd = 1.0 / sqrt(bn_var + self.bn.eps) + # bn_istd = 1 / bn_std + # w_fold = gamma / bn_std * W + scale_factor = gamma * bn_istd + if self.conv.groups == 1: + w_fold = self.conv.weight * scale_factor.reshape(-1, 1, 1, 1) + else: + w_fold = self.conv.weight * scale_factor.reshape( + self.conv.groups, -1, 1, 1, 1 + ) + + w_fold = self.apply_quant_weight(w_fold) + # b_fold = gamma * (b - bn_mean) / bn_std + beta + b_fold = beta + gamma * (conv_bias - bn_mean) * bn_istd + return w_fold, b_fold + + def update_running_mean_and_running_var( + self, bn_mean, bn_var, num_elements_per_channel + ): + # update running mean and running var. no grad, use unbiased bn var + bn_mean = bn_mean.detach() + bn_var = ( + bn_var.detach() * num_elements_per_channel / (num_elements_per_channel - 1) + ) + exponential_average_factor = 1 - self.bn.momentum + add_update( + self.bn.running_mean, + delta=bn_mean, + alpha=1 - exponential_average_factor, + beta=exponential_average_factor, + ) + add_update( + self.bn.running_var, + delta=bn_var, + alpha=1 - exponential_average_factor, + beta=exponential_average_factor, + ) + + def calc_conv_bn_qat(self, inp, approx=True): + if self.training and not approx: + conv = self.conv(inp) + bn_mean, bn_var = self.get_batch_mean_var(conv) + num_elements_per_channel = conv.size / conv.shape[1] + self.update_running_mean_and_running_var( + bn_mean, bn_var, num_elements_per_channel + ) + else: + bn_mean, bn_var = self.bn.running_mean, self.bn.running_var + + # get gamma and beta in BatchNorm + gamma = self.bn.weight + if gamma is None: + gamma = ones((self.bn.num_features), dtype="float32") + gamma = gamma.reshape(1, -1, 1, 1) + beta = self.bn.bias + if beta is None: + beta = zeros((self.bn.num_features), dtype="float32") + beta = beta.reshape(1, -1, 1, 1) + # conv_bias + conv_bias = self.conv.bias + if conv_bias is None: + conv_bias = zeros(self.conv._infer_bias_shape(), dtype="float32") + + bn_istd = 1.0 / sqrt(bn_var + self.bn.eps) + # bn_istd = 1 / bn_std + # w_fold = gamma / bn_std * W + scale_factor = gamma * bn_istd + if self.conv.groups == 1: + w_fold = self.conv.weight * scale_factor.reshape(-1, 1, 1, 1) + else: + w_fold = self.conv.weight * scale_factor.reshape( + self.conv.groups, -1, 1, 1, 1 + ) + b_fold = None + if not (self.training and approx): + # b_fold = gamma * (conv_bias - bn_mean) / bn_std + beta + b_fold = beta + gamma * (conv_bias - bn_mean) * bn_istd + + w_qat = self.apply_quant_weight(w_fold) + b_qat = fake_quant_bias(b_fold, inp, w_qat) + conv = self.conv.calc_conv(inp, w_qat, b_qat) + if not (self.training and approx): + return conv + + # rescale conv to get original conv output + orig_conv = conv / scale_factor.reshape(1, -1, 1, 1) + if self.conv.bias is not None: + orig_conv = orig_conv + self.conv.bias + # calculate batch norm + bn_mean, bn_var = self.get_batch_mean_var(orig_conv) + bn_istd = 1.0 / sqrt(bn_var + self.bn.eps) + conv = gamma * bn_istd * (orig_conv - bn_mean) + beta + num_elements_per_channel = conv.size / conv.shape[1] + self.update_running_mean_and_running_var( + bn_mean, bn_var, num_elements_per_channel + ) + return conv + + @classmethod + def from_float_module(cls, float_module: Float._ConvBnActivation2d): + r""" + Return a :class:`~.QATModule` instance converted from + a float :class:`~.Module` instance. + """ + qat_module = cls( + float_module.conv.in_channels, + float_module.conv.out_channels, + float_module.conv.kernel_size, + float_module.conv.stride, + float_module.conv.padding, + float_module.conv.dilation, + float_module.conv.groups, + float_module.conv.bias is not None, + float_module.conv.conv_mode.name, + float_module.conv.compute_mode.name, + ) + qat_module.conv.weight = float_module.conv.weight + qat_module.conv.bias = float_module.conv.bias + qat_module.bn = float_module.bn + return qat_module + + +class ConvBn2d(_ConvBnActivation2d): + r""" + A fused :class:`~.QATModule` including Conv2d, BatchNorm2d with QAT support. + Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. + """ + + def forward(self, inp): + return self.apply_quant_activation(self.calc_conv_bn_qat(inp)) + + +class ConvBnRelu2d(_ConvBnActivation2d): + r""" + A fused :class:`~.QATModule` including Conv2d, BatchNorm2d and relu with QAT support. + Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. + """ + + def forward(self, inp): + return self.apply_quant_activation(relu(self.calc_conv_bn_qat(inp))) diff --git a/imperative/python/megengine/module/qat/elemwise.py b/imperative/python/megengine/module/qat/elemwise.py new file mode 100644 index 0000000000000000000000000000000000000000..f99583bdeaf8d9a4739088920a99bb8ab7973e29 --- /dev/null +++ b/imperative/python/megengine/module/qat/elemwise.py @@ -0,0 +1,31 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .. import elemwise as Float +from .module import QATModule + + +class Elemwise(Float.Elemwise, QATModule): + r""" + A :class:`~.QATModule` to do elemwise operator with QAT support. + Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. + + :param method: the elemwise method, see :class:`~.module.elemwise.Elemwise` for detail. + """ + + with_weight = False + + def forward(self, *inps): + return self.apply_quant_activation(super().forward(*inps)) + + @classmethod + def from_float_module(cls, float_module: Float.Elemwise): + r""" + Return a :class:`~.QATModule` instance converted from + a float :class:`~.Module` instance. + """ + return cls(float_module.method.name) diff --git a/imperative/python/megengine/module/qat/linear.py b/imperative/python/megengine/module/qat/linear.py new file mode 100644 index 0000000000000000000000000000000000000000..4067d51c6386aeb601f78591f9f609f7495f5751 --- /dev/null +++ b/imperative/python/megengine/module/qat/linear.py @@ -0,0 +1,39 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from ...quantization.utils import fake_quant_bias +from .. import linear as Float +from .module import QATModule + + +class Linear(Float.Linear, QATModule): + r""" + A :class:`~.QATModule` version of :class:`~.module.linear.Linear`. + Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. + + :param in_features: size of each input sample. + :param out_features: size of each output sample. + :param bias: If set to ``False``, the layer will not learn an additive bias. + Default: ``True`` + + """ + + def forward(self, x): + w_qat = self.apply_quant_weight(self.weight) + b_qat = fake_quant_bias(self.bias, x, w_qat) + return self.apply_quant_activation(self._calc_linear(x, w_qat, b_qat)) + + @classmethod + def from_float_module(cls, float_module: Float.Linear): + r""" + Return a :class:`~.QATModule` instance converted from + a float :class:`~.Module` instance. + """ + qmod = cls(float_module.in_features, float_module.out_features) + qmod.weight = float_module.weight + qmod.bias = float_module.bias + return qmod diff --git a/imperative/python/megengine/module/qat/module.py b/imperative/python/megengine/module/qat/module.py new file mode 100644 index 0000000000000000000000000000000000000000..544e04aff63fb6f73dedf832a6d186d09749bd15 --- /dev/null +++ b/imperative/python/megengine/module/qat/module.py @@ -0,0 +1,154 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from abc import abstractmethod + +from ...quantization import FakeQuantize, Observer, QConfig +from ...tensor import Tensor +from ..module import Module + + +class QATModule(Module): + r""" + Base class of quantized-float related Module, basically for QAT and Calibration. + + Use :meth:`~.QATModule.from_float_module` to generate a instance from float :class:`~.Module`. + Or use :func:`~.quantize.quantize_qat` to do it recursively and automatically. + + Can also be converted to :class:`~.QuantizedModule` for deployment using + :func:`~.quantize.quantize` further. + """ + + with_weight = True + with_act = True + + def __init__(self): + super().__init__() + + self.weight_observer = None # type: Observer + self.act_observer = None # type: Observer + + self.weight_fake_quant = None # type: FakeQuantize + self.act_fake_quant = None # type: FakeQuantize + + def set_qconfig(self, qconfig: QConfig): + r""" + Set quantization related configs with ``qconfig``, including + observer and fake_quant for weight and activation. + """ + + def safe_call(func): + return func() if func is not None else None + + if self.with_act: + self.act_observer = safe_call(qconfig.act_observer) + self.act_fake_quant = safe_call(qconfig.act_fake_quant) + if self.with_weight: + self.weight_observer = safe_call(qconfig.weight_observer) + self.weight_fake_quant = safe_call(qconfig.weight_fake_quant) + + def _enable_exec(self, with_module, func, enable): + if not with_module: + return + if enable: + func.enable() + else: + func.disable() + + def set_fake_quant(self, enable): + self._enable_exec(self.with_act, self.act_fake_quant, enable) + self._enable_exec(self.with_weight, self.weight_fake_quant, enable) + + def set_observer(self, enable): + self._enable_exec(self.with_act, self.act_observer, enable) + self._enable_exec(self.with_weight, self.weight_observer, enable) + + def _apply_fakequant_with_observer( + self, target: Tensor, fake_quant: FakeQuantize, observer: Observer + ): + # do observer + if observer is None: + oup = target + q_dict = None + else: + oup = observer(target) + q_dict = observer.get_qparams() + # do fake quant + if fake_quant is not None: + oup = fake_quant(oup, q_dict) + # use qparams of fake_quant if have. + if hasattr(fake_quant, "get_qparams"): + q_dict = fake_quant.get_qparams() + # set to tensor qparams. + if q_dict is not None: + oup.q_dict.update(q_dict) + return oup + + def apply_quant_weight(self, target: Tensor): + r""" + Apply weight's observer and fake_quant from ``qconfig`` on ``target``. + """ + return self._apply_fakequant_with_observer( + target, self.weight_fake_quant, self.weight_observer + ) + + def apply_quant_activation(self, target: Tensor): + r""" + Apply weight's observer and fake_quant from ``qconfig`` on ``target``. + """ + return self._apply_fakequant_with_observer( + target, self.act_fake_quant, self.act_observer + ) + + def _get_method_result( + self, method: str, fake_quant: FakeQuantize, observer: Observer + ): + if hasattr(fake_quant, method): + return getattr(fake_quant, method)() + elif hasattr(observer, method): + return getattr(observer, method)() + return None + + def get_weight_dtype(self): + r""" + Get weight's quantization dtype as the method from ``qconfig``. + """ + return self._get_method_result( + "get_dtype", self.weight_fake_quant, self.weight_observer + ) + + def get_activation_dtype(self): + r""" + Get activation's quantization dtype as the method from ``qconfig``. + """ + return self._get_method_result( + "get_dtype", self.act_fake_quant, self.act_observer + ) + + def get_weight_qparams(self): + r""" + Get weight's quantization parameters. + """ + return self._get_method_result( + "get_qparams", self.weight_fake_quant, self.weight_observer + ) + + def get_activation_qparams(self): + r""" + Get activation's quantization parameters. + """ + return self._get_method_result( + "get_qparams", self.act_fake_quant, self.act_observer + ) + + @classmethod + @abstractmethod + def from_float_module(cls, float_module: Module): + r""" + Return a :class:`~.QATModule` instance converted from + a float :class:`~.Module` instance. + """ diff --git a/imperative/python/megengine/module/qat/quant_dequant.py b/imperative/python/megengine/module/qat/quant_dequant.py new file mode 100644 index 0000000000000000000000000000000000000000..0baa3e1c7822085e520c01017d8219104905d6ec --- /dev/null +++ b/imperative/python/megengine/module/qat/quant_dequant.py @@ -0,0 +1,50 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .. import quant_dequant as Float +from .module import QATModule + + +class QuantStub(Float.QuantStub, QATModule): + r""" + A helper QATModule simply return input, but will quantize + input after converted to :class:`~.QuantizedModule`. + """ + + with_weight = False + + def forward(self, inp): + return self.apply_quant_activation(inp) + + @classmethod + def from_float_module(cls, float_module: Float.QuantStub): + r""" + Return a :class:`~.QATModule` instance converted from + a float :class:`~.Module` instance. + """ + return cls() + + +class DequantStub(Float.DequantStub, QATModule): + r""" + A helper QATModule simply return input, but will de-quantize + input after converted to :class:`~.QuantizedModule`. + """ + + with_weight = False + with_act = False + + def forward(self, inp): + return inp + + @classmethod + def from_float_module(cls, float_module: Float.DequantStub): + r""" + Return a :class:`~.QATModule` instance converted from + a float :class:`~.Module` instance. + """ + return cls() diff --git a/imperative/python/megengine/module/quant_dequant.py b/imperative/python/megengine/module/quant_dequant.py new file mode 100644 index 0000000000000000000000000000000000000000..aaf2b0cc3ae333c34574dfc3e6284a70c99bc3eb --- /dev/null +++ b/imperative/python/megengine/module/quant_dequant.py @@ -0,0 +1,28 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .module import Module + + +class QuantStub(Module): + r""" + A helper :class:`~.Module` simply returning input. Could be replaced with :class:`~.QATModule` + version :class:`~.qat.QuantStub` using :func:`~.quantize.quantize_qat`. + """ + + def forward(self, inp): + return inp + + +class DequantStub(Module): + r""" + A helper :class:`~.Module` simply returning input. Could be replaced with :class:`~.QATModule` + version :class:`~.qat.DequantStub` using :func:`~.quantize.quantize_qat`. + """ + + def forward(self, inp): + return inp diff --git a/imperative/python/megengine/module/quantized/__init__.py b/imperative/python/megengine/module/quantized/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e641476d6a363a609660fb2495bf946e91b7b6c8 --- /dev/null +++ b/imperative/python/megengine/module/quantized/__init__.py @@ -0,0 +1,14 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .concat import Concat +from .conv import Conv2d, ConvRelu2d +from .conv_bn import ConvBn2d, ConvBnRelu2d +from .elemwise import Elemwise +from .linear import Linear +from .module import QuantizedModule +from .quant_dequant import DequantStub, QuantStub diff --git a/imperative/python/megengine/module/quantized/concat.py b/imperative/python/megengine/module/quantized/concat.py new file mode 100644 index 0000000000000000000000000000000000000000..5815d7d9ee885cda3965beb4a5171590bcd1eb9b --- /dev/null +++ b/imperative/python/megengine/module/quantized/concat.py @@ -0,0 +1,35 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Iterable + +from ... import functional as F +from ...tensor import Tensor +from ..qat import concat as QAT +from .module import QuantizedModule + + +class Concat(QuantizedModule): + r""" + A :class:`~.QuantizedModule` to do quantized concat, inference only. + """ + + def __init__(self, dtype=None): + super().__init__() + self.output_dtype = dtype + + def forward(self, inps: Iterable[Tensor], axis: int = 0): + new_inps = (x.astype(self.output_dtype) for x in inps) + return F.concat(new_inps, axis) + + @classmethod + def from_qat_module(cls, qat_module: QAT.Concat): + r""" + return a :class:`~.QuantizedModule` instance converted from a + :class:`~.QATModule` instance. + """ + return cls(qat_module.get_activation_dtype()) diff --git a/imperative/python/megengine/module/quantized/conv.py b/imperative/python/megengine/module/quantized/conv.py new file mode 100644 index 0000000000000000000000000000000000000000..696e4f63ec62577c726cc43934c43aa30b27e995 --- /dev/null +++ b/imperative/python/megengine/module/quantized/conv.py @@ -0,0 +1,107 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Tuple, Union + +import numpy as np + +from ... import module as Float +from ...core.tensor import dtype +from ...functional import conv_bias_activation +from ...tensor_nn import Parameter +from ..qat import conv as QAT +from .module import QuantizedModule + + +class Conv2d(Float.Conv2d, QuantizedModule): + r"""quantized version of :class:`~.qat.conv.Conv2d`.""" + r"""Applies a 2D convolution over an quantized input tensor, inference only. + + The parameter is same with :class: `~.Conv2d` + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, int]], + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + groups: int = 1, + conv_mode: str = "CROSS_CORRELATION", + compute_mode: str = "DEFAULT", + dtype=None, + ): + super().__init__( + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation, + groups, + True, + conv_mode, + compute_mode, + ) + self.output_dtype = dtype + + def calc_conv_quantized(self, inp, nonlinear_mode="IDENTITY"): + inp_scale = dtype.get_scale(inp.dtype) + w_scale = dtype.get_scale(self.weight.dtype) + bias_scale = inp_scale * w_scale + return conv_bias_activation( + inp, + self.weight, + self.bias.astype(dtype.qint32(bias_scale)), + self.output_dtype, + self.stride, + self.padding, + self.dilation, + self.groups, + conv_mode=self.conv_mode, + compute_mode=self.compute_mode, + nonlinear_mode=nonlinear_mode, + ) + + @classmethod + def from_qat_module(cls, qat_module: QAT.Conv2d): + r""" + return a :class:`~.QuantizedModule` instance converted from a + :class:`~.QATModule` instance. + """ + output_dtype = qat_module.get_activation_dtype() + qconv = cls( + qat_module.in_channels, + qat_module.out_channels, + qat_module.kernel_size, + qat_module.stride, + qat_module.padding, + qat_module.dilation, + qat_module.groups, + dtype=output_dtype, + ) + weight = qat_module.weight.astype(qat_module.get_weight_dtype()) + qconv.weight = Parameter(weight.numpy()) + if qat_module.bias is not None: + qconv.bias = Parameter(qat_module.bias.numpy()) + else: + qconv.bias = Parameter( + np.zeros(qat_module._infer_bias_shape(), dtype=np.float32) + ) + return qconv + + def forward(self, inp): + return self.calc_conv_quantized(inp, nonlinear_mode="IDENTITY") + + +class ConvRelu2d(Conv2d): + r"""quantized version of :class:`~.qat.conv.ConvRelu2d`.""" + + def forward(self, inp): + return self.calc_conv_quantized(inp, nonlinear_mode="RELU") diff --git a/imperative/python/megengine/module/quantized/conv_bn.py b/imperative/python/megengine/module/quantized/conv_bn.py new file mode 100644 index 0000000000000000000000000000000000000000..e7c1de08aec800101d613a16637c89cf215da70d --- /dev/null +++ b/imperative/python/megengine/module/quantized/conv_bn.py @@ -0,0 +1,56 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from ...tensor_nn import Parameter +from ..qat import conv_bn as QAT +from .conv import Conv2d + + +class _ConvBnActivation2d(Conv2d): + r"""Applies a 2D convolution over an quantized input tensor, inference only. + + The parameter is same with :class: `~.Conv2d` + """ + + @classmethod + def from_qat_module(cls, qat_module: QAT._ConvBnActivation2d): + r""" + return a :class:`~.QuantizedModule` instance converted from a + :class:`~.QATModule` instance. + """ + output_dtype = qat_module.get_activation_dtype() + qconv = cls( + qat_module.conv.in_channels, + qat_module.conv.out_channels, + qat_module.conv.kernel_size, + qat_module.conv.stride, + qat_module.conv.padding, + qat_module.conv.dilation, + qat_module.conv.groups, + dtype=output_dtype, + ) + w_fold, b_fold = qat_module.fold_weight_bias( + qat_module.bn.running_mean, qat_module.bn.running_var + ) + weight = w_fold.astype(qat_module.get_weight_dtype()) + qconv.weight = Parameter(weight.numpy()) + qconv.bias = Parameter(b_fold.numpy()) + return qconv + + +class ConvBn2d(_ConvBnActivation2d): + r"""quantized version of :class:`~.qat.conv_bn.ConvBn2d`.""" + + def forward(self, inp): + return self.calc_conv_quantized(inp, nonlinear_mode="IDENTITY") + + +class ConvBnRelu2d(_ConvBnActivation2d): + r"""quantized version of :class:`~.qat.conv_bn.ConvBnRelu2d`.""" + + def forward(self, inp): + return self.calc_conv_quantized(inp, nonlinear_mode="RELU") diff --git a/imperative/python/megengine/module/quantized/elemwise.py b/imperative/python/megengine/module/quantized/elemwise.py new file mode 100644 index 0000000000000000000000000000000000000000..8caee62ed8ff04ffc1520967ccf6f61fb1be8448 --- /dev/null +++ b/imperative/python/megengine/module/quantized/elemwise.py @@ -0,0 +1,36 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from ...core.ops._internal import param_defs as P +from ...functional.elemwise import _elemwise_multi_type +from ...tensor import Tensor +from ..qat import elemwise as QAT +from .module import QuantizedModule + + +class Elemwise(QuantizedModule): + r"""quantized version of :class:`~.qat.elemwise.Elemwise`.""" + + _elemwise_multi_type_mode = P.ElemwiseMultiType.Mode + + def __init__(self, method, dtype=None): + super().__init__() + self.method = self._elemwise_multi_type_mode.convert("Q" + method) + self.output_dtype = dtype + + def forward(self, *inps): + if self.training: + raise ValueError("quantized module only support inference.") + return _elemwise_multi_type(*inps, mode=self.method, dtype=self.output_dtype) + + @classmethod + def from_qat_module(cls, qat_module: QAT.Elemwise): + r""" + return a :class:`~.QuantizedModule` instance converted from a + :class:`~.QATModule` instance. + """ + return cls(qat_module.method.name, qat_module.get_activation_dtype()) diff --git a/imperative/python/megengine/module/quantized/linear.py b/imperative/python/megengine/module/quantized/linear.py new file mode 100644 index 0000000000000000000000000000000000000000..e42fe266b9e051923a20c62ab22e2a5b07ebb18e --- /dev/null +++ b/imperative/python/megengine/module/quantized/linear.py @@ -0,0 +1,52 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +from ... import functional as F +from ...core.tensor import dtype +from ...tensor_nn import Parameter +from ..qat import linear as QAT +from .module import QuantizedModule + + +class Linear(QuantizedModule): + r"""quantized version of :class:`~.qat.linear.Linear`.""" + + def __init__( + self, dtype: np.dtype = None, + ): + super().__init__() + self.weight = None + self.bias = None + self.output_dtype = dtype + + def forward(self, inp): + if self.training: + raise ValueError("quantized module only support inference.") + inp_scale = dtype.get_scale(inp.dtype) + w_scale = dtype.get_scale(self.weight.dtype) + bias_dtype = dtype.qint32(inp_scale * w_scale) + return F.linear( + inp, + self.weight, + None if self.bias is None else self.bias.astype(bias_dtype), + ).astype(self.output_dtype) + + @classmethod + def from_qat_module(cls, qat_module: QAT.Linear): + r""" + return a :class:`~.QuantizedModule` instance converted from a + :class:`~.QATModule` instance. + """ + output_dtype = qat_module.get_activation_dtype() + qmod = cls(dtype=output_dtype) + weight = qat_module.weight.astype(qat_module.get_weight_dtype()) + qmod.weight = Parameter(weight.numpy()) + if qat_module.bias is not None: + qmod.bias = Parameter(qat_module.bias.numpy()) + return qmod diff --git a/imperative/python/megengine/module/quantized/module.py b/imperative/python/megengine/module/quantized/module.py new file mode 100644 index 0000000000000000000000000000000000000000..4fccdbfa27b1b17a8de486bbb41213f3585556a5 --- /dev/null +++ b/imperative/python/megengine/module/quantized/module.py @@ -0,0 +1,31 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from abc import abstractmethod + +from ..module import Module +from ..qat import QATModule + + +class QuantizedModule(Module): + r""" + Base class of quantized Module, which should be converted from QATModule + and not support traning. + """ + + def __call__(self, *inputs, **kwargs): + if self.training: + raise ValueError("quantized module only support inference.") + return super().__call__(*inputs, **kwargs) + + @classmethod + @abstractmethod + def from_qat_module(cls, qat_module: QATModule): + r""" + return a :class:`~.QuantizedModule` instance converted from a + :class:`~.QATModule` instance. + """ diff --git a/imperative/python/megengine/module/quantized/quant_dequant.py b/imperative/python/megengine/module/quantized/quant_dequant.py new file mode 100644 index 0000000000000000000000000000000000000000..0c245011f80c1b509eb7490633d3bfc921254799 --- /dev/null +++ b/imperative/python/megengine/module/quantized/quant_dequant.py @@ -0,0 +1,49 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from ..qat import quant_dequant as QAT +from .module import QuantizedModule + + +class QuantStub(QuantizedModule): + r""" + quantized version of :class:`~.qat.quant_dequant.QuantStub`, + will convert input to quantized dtype. + """ + + def __init__(self, dtype=None): + super().__init__() + self.output_dtype = dtype + + def forward(self, inp): + return inp.astype(self.output_dtype) + + @classmethod + def from_qat_module(cls, qat_module: QAT.QuantStub): + r""" + return a :class:`~.QuantizedModule` instance converted from a + :class:`~.QATModule` instance. + """ + return cls(qat_module.get_activation_dtype()) + + +class DequantStub(QuantizedModule): + r""" + quantized version of :class:`~.qat.quant_dequant.DequantStub`, + will restore quantized input to float32 dtype. + """ + + def forward(self, inp): + return inp.astype("float32") + + @classmethod + def from_qat_module(cls, qat_module: QAT.DequantStub): + r""" + return a :class:`~.QuantizedModule` instance converted from a + :class:`~.QATModule` instance. + """ + return cls() diff --git a/imperative/python/megengine/module/sequential.py b/imperative/python/megengine/module/sequential.py new file mode 100644 index 0000000000000000000000000000000000000000..03afd48a7e3f0b4012e2fd59e2b6bff4d66b602f --- /dev/null +++ b/imperative/python/megengine/module/sequential.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from collections import OrderedDict + +from .module import Module + + +class Sequential(Module): + r"""A sequential container. + Modules will be added to it in the order they are passed in the constructor. + Alternatively, an ordered dict of modules can also be passed in. + + To make it easier to understand, here is a small example: + + .. testcode:: + + import numpy as np + import megengine.nn as nn + import megengine.nn.functional as F + + batch_size = 64 + data = nn.Input("data", shape=(batch_size, 1, 28, 28), dtype=np.float32, value=np.zeros((batch_size, 1, 28, 28))) + label = nn.Input("label", shape=(batch_size,), dtype=np.int32, value=np.zeros(batch_size,)) + + data = data.reshape(batch_size, -1) + net = nn.Sequential( + nn.Linear(28 * 28, 320), + nn.Linear(320, 500), + nn.Linear(500, 320), + nn.Linear(320, 10) + ) + pred = net(data) + + loss = F.cross_entropy_with_softmax(pred, label) + + """ + + def __init__(self, *args): + super().__init__() + self.layer_keys = [] + self.layer_values = [] + if len(args) == 1 and isinstance(args[0], OrderedDict): + for key, module in args[0].items(): + # self.add_module(key, module) + setattr(self, key, module) + self.layer_keys.append(key) + self.layer_values.append(module) + else: + for idx, module in enumerate(args): + # self.add_module(str(idx), module) + setattr(self, str(idx), module) + self.layer_keys.append(str(idx)) + self.layer_values.append(module) + + def __getitem__(self, idx): + if isinstance(idx, slice): + return self.__class__( + OrderedDict(zip(self.layer_keys[idx], self.layer_values[idx])) + ) + else: + return self.layer_values[idx] + + def __setitem__(self, idx, module): + key = self.layer_keys[idx] + self.layer_values[idx] = module + return setattr(self, key, module) + + def __delitem__(self, idx): + if isinstance(idx, slice): + for key in self.layer_keys[idx]: + delattr(self, key) + del self.layer_keys[idx] + del self.layer_values[idx] + else: + delattr(self, self.layer_keys[idx]) + del self.layer_keys[idx] + del self.layer_values[idx] + + def __len__(self): + return len(self.layer_keys) + + def __iter__(self): + return iter(self.layer_values) + + def forward(self, inp): + for layer in self.layer_values: + inp = layer(inp) + return inp diff --git a/imperative/python/megengine/optimizer/__init__.py b/imperative/python/megengine/optimizer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ad783e0605e0308354d5e2ef3ba21327086f3938 --- /dev/null +++ b/imperative/python/megengine/optimizer/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .adadelta import Adadelta +from .adagrad import Adagrad +from .adam import Adam +from .lr_scheduler import LRScheduler +from .multi_step_lr import MultiStepLR +from .optimizer import Optimizer +from .sgd import SGD diff --git a/imperative/python/megengine/optimizer/adadelta.py b/imperative/python/megengine/optimizer/adadelta.py new file mode 100644 index 0000000000000000000000000000000000000000..9de92fa9b8631f4b25d2e8b4e2293a3d1c292260 --- /dev/null +++ b/imperative/python/megengine/optimizer/adadelta.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Iterable, Union + +import numpy as np + +from ..functional import sqrt +from ..tensor_nn import Buffer, Parameter +from .distributed_optimizer import DistributedOptimizer + + +class Adadelta(DistributedOptimizer): + r"""Implements Adadelta algorithm. + + It has been proposed in `"ADADELTA: An Adaptive Learning Rate Method" `_. + + :param params: iterable of parameters to optimize or dicts defining + parameter groups. + :param lr: coefficient that scale delta before it is applied + to the parameters (default: 1.0). + :param rho: coefficient used for computing a running average + of squared gradients (default: 0.9). + :param eps: term added to the denominator to improve + numerical stability (default: 1e-6). + :param weight_decay: weight decay (L2 penalty) (default: 0). + """ + + def __init__( + self, + params: Union[Iterable[Parameter], dict], + lr: float = 1.0, + rho: float = 0.9, + eps: float = 1e-6, + weight_decay: float = 0.0, + **kwargs + ): + assert lr >= 0.0, "Invalid learning rate: {}".format(lr) + assert rho >= 0.0 and rho <= 1.0, "Invalid rho value: {}".format(rho) + assert eps >= 0.0, "Invalid epsilon value: {}".format(eps) + assert weight_decay >= 0.0, "Invalid weight_decay value: {}".format( + weight_decay + ) + + defaults = dict(lr=lr, rho=rho, eps=eps, weight_decay=weight_decay) + super().__init__(params, defaults, **kwargs) + + def _create_state(self, param_group): + for param in param_group["params"]: + self._add_state(param, "square_avg") + self._add_state(param, "acc_delta") + self._add_state(param, "step", initializer=0.0) + + def _updates(self, param_group): + lr = param_group["lr"] + weight_decay = param_group["weight_decay"] + rho = param_group["rho"] + eps = param_group["eps"] + + for param in param_group["params"]: + + if param.__wrapped__ in self._grad_skip: + self._grad_skip.remove(param.__wrapped__) + continue + + if not isinstance(param.grad, Buffer): + raise TypeError( + "grad must be a Buffer, maybe you forget to call backward()?" + ) + + if not param.requires_grad: + continue + + states = self._state[param] + step = states["step"] + step += 1.0 + grad = param.grad + if weight_decay != 0.0: + grad += param * weight_decay + + square_avg = states["square_avg"] + acc_delta = states["acc_delta"] + square_avg = rho * square_avg + (1 - rho) * grad ** 2 + std = sqrt(square_avg + eps) + delta = sqrt(acc_delta + eps) / std * grad + param -= lr * delta + acc_delta = rho * acc_delta + (1 - rho) * delta ** 2 + states["square_avg"]._reset(square_avg) + states["acc_delta"]._reset(acc_delta) + + assert len(self._grad_skip) == 0 diff --git a/imperative/python/megengine/optimizer/adagrad.py b/imperative/python/megengine/optimizer/adagrad.py new file mode 100644 index 0000000000000000000000000000000000000000..804c7abe7d0eed8fcd87ac6805ffbec4016947f5 --- /dev/null +++ b/imperative/python/megengine/optimizer/adagrad.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Iterable, Union + +import numpy as np + +from ..functional import sqrt +from ..tensor_nn import Buffer, Parameter +from .distributed_optimizer import DistributedOptimizer + + +class Adagrad(DistributedOptimizer): + r"""Implements Adagrad algorithm. + + It has been proposed in `"Adaptive Subgradient Methods for Online Learning + and Stochastic Optimization" `_. + + :param params: iterable of parameters to optimize or dicts defining + parameter groups. + :param lr: coefficient that scale delta before it is applied + to the parameters (default: 1e-2). + :param lr_decay: learning rate decay (default: 0) + :param eps: term added to the denominator to improve + numerical stability (default: 1e-10). + :param weight_decay: weight decay (L2 penalty) (default: 0). + """ + + def __init__( + self, + params: Union[Iterable[Parameter], dict], + lr: float = 1e-2, + lr_decay: float = 0.0, + eps: float = 1e-10, + weight_decay: float = 0.0, + **kwargs + ): + assert lr >= 0.0, "Invalid learning rate: {}".format(lr) + assert lr_decay >= 0, "Invalid learning rate decay: {}".format(lr_decay) + assert eps >= 0.0, "Invalid epsilon value: {}".format(eps) + assert weight_decay >= 0.0, "Invalid weight_decay value: {}".format( + weight_decay + ) + + defaults = dict(lr=lr, lr_decay=lr_decay, eps=eps, weight_decay=weight_decay) + super().__init__(params, defaults, **kwargs) + + def _create_state(self, param_group): + for param in param_group["params"]: + self._add_state(param, "square_avg") + self._add_state(param, "step", initializer=0.0) + + def _updates(self, param_group): + lr = param_group["lr"] + lr_decay = param_group["lr_decay"] + weight_decay = param_group["weight_decay"] + eps = param_group["eps"] + + for param in param_group["params"]: + + if param.__wrapped__ in self._grad_skip: + self._grad_skip.remove(param.__wrapped__) + continue + + if not isinstance(param.grad, Buffer): + raise TypeError( + "grad must be a Buffer, maybe you forget to call backward()?" + ) + + if not param.requires_grad: + continue + + states = self._state[param] + step = states["step"] + step += 1.0 + grad = param.grad + if weight_decay != 0.0: + grad += param * weight_decay + + square_avg = states["square_avg"] + square_avg += grad ** 2 + delta = grad / sqrt(square_avg + eps) + clr = lr / (1 + (step - 1) * lr_decay) + + param -= clr * delta + assert len(self._grad_skip) == 0 diff --git a/imperative/python/megengine/optimizer/adam.py b/imperative/python/megengine/optimizer/adam.py new file mode 100644 index 0000000000000000000000000000000000000000..fac9f4cb88fa56b0d89751c3d4700df3b4fcb649 --- /dev/null +++ b/imperative/python/megengine/optimizer/adam.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Iterable, Tuple, Union + +from ..tensor_nn import Buffer, Parameter +from .distributed_optimizer import DistributedOptimizer + + +class Adam(DistributedOptimizer): + r"""Implements Adam algorithm proposed in `"Adam: A Method for Stochastic Optimization" `_. + + :param params: iterable of parameters to optimize or dicts defining + parameter groups. + :param lr: learning rate. + :param betas: coefficients used for computing running averages of gradient + and its square. Default: (0.9, 0.999) + :param eps: term added to the denominator to improve numerical stability + Default: 1e-8 + :param weight_decay: weight decay (L2 penalty). Default: 0 + """ + + def __init__( + self, + params: Union[Iterable[Parameter], dict], + lr: float, + betas: Tuple[float, float] = (0.9, 0.999), + eps: float = 1e-8, + weight_decay: float = 0.0, + **kwargs + ): + if lr < 0.0: + raise ValueError("Invalid learning rate: {}".format(lr)) + if weight_decay < 0.0: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) + if not 0.0 <= betas[1] < 1.0: + raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) + + defaults = dict(lr=lr, weight_decay=weight_decay, betas=betas, eps=eps) + super().__init__(params, defaults, **kwargs) + + def _create_state(self, param_group): + for param in param_group["params"]: + self._add_state(param, "exp_avg") + self._add_state(param, "exp_avg_sq") + self._add_state(param, "step", initializer=0.0) + + def _updates(self, param_group): + lr = param_group["lr"] + weight_decay = param_group["weight_decay"] + eps = param_group["eps"] + beta0, beta1 = param_group["betas"] + + for param in param_group["params"]: + + if param.__wrapped__ in self._grad_skip: + self._grad_skip.remove(param.__wrapped__) + continue + + if not param.requires_grad: + continue + + if not isinstance(param.grad, Buffer): + raise TypeError( + "grad must be a Buffer, maybe you forget to call backward()?" + ) + + grad = param.grad + if weight_decay != 0.0: + grad += param * weight_decay + + states = self._state[param] + step = states["step"] + step += 1.0 + exp_avg = states["exp_avg"] + exp_avg_sq = states["exp_avg_sq"] + exp_avg = beta0 * exp_avg + grad * (1 - beta0) + exp_avg_sq = beta1 * exp_avg_sq + (1 - beta1) * (grad * grad) + + delta = (exp_avg / (1 - beta0 ** step)) / ( + (exp_avg_sq / (1 - beta1 ** step)) ** 0.5 + eps + ) + param -= lr * delta + + # not inplace change, need to update underlying tensor handler in state + states["exp_avg"]._reset(exp_avg) + states["exp_avg_sq"]._reset(exp_avg_sq) + + assert len(self._grad_skip) == 0 diff --git a/imperative/python/megengine/optimizer/distributed_optimizer.py b/imperative/python/megengine/optimizer/distributed_optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..86168c9ad844f9d378f277f1721b77521b021746 --- /dev/null +++ b/imperative/python/megengine/optimizer/distributed_optimizer.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Iterable as Iter +from typing import Optional, Union + +from ..device import get_default_device +from ..distributed.group import get_client, is_distributed +from ..functional import add_update +from ..functional.distributed import WORLD, Group, all_reduce_sum, broadcast +from ..functional.utils import copy +from ..tensor import Tensor, TensorDict +from ..tensor_nn import Parameter +from .optimizer import Optimizer +from .param_pack import get_pack_list, pack_allreduce_split + + +class DistributedOptimizer(Optimizer): + r"""Add Distributed Func for distributed training. + + :param params: specifies what Tensors should be optimized. + :param defaults: a dict of default parameters of Optimizer, like learning rate or momentum. + :param reduce_method: use all_reduce_sum or all_reduce_mean to reduce gradients + :param bcast_period: broadcasts params every *bcast_period* iterations. + if it equals to 0, it will broadcast params only at the beginning. Default: 500 + :param param_pack: whether to pack gradients to avoid small packages send/recv. Default: False + :param param_pack_thd: max size of packed gradients by bytes. Default: 10 * 1024 * 1024 + """ + + def __init__( + self, + params: Union[Iter[Parameter], dict], + defaults: dict, + reduce_method: Optional[str] = None, + dist_group: Optional[Group] = WORLD, + bcast_period: int = 0, + param_pack: bool = False, + param_pack_thd: int = 10 * 1024 * 1024, + ): + if is_distributed(): + assert reduce_method in ["sum", "mean"], "reduce_method must be specified" + defaults["orders"] = [] + defaults["dist_group"] = dist_group + super().__init__(params, defaults) + self._bcast_period = bcast_period + self._param_pack = param_pack + self._param_pack_thd = param_pack_thd + self._reduce_method = reduce_method + + self.add_save_load_state_ignore_keys( + {"grads", "orders", "pack_list", "shape_list", "dist_group"} + ) + + if is_distributed() and bcast_period != -1: + self.bcast_param() + + def grad_callback(self, grad, i, group): + if is_distributed() and group["dist_group"] is not None: + dist_group = group["dist_group"] + if self._param_pack and "pack_list" in group: + for pack, shapes in zip(group["pack_list"], group["shape_list"]): + if i == pack[-1]: + pack_allreduce_split(group, pack, shapes, self._reduce_method) + else: + group["orders"].append(i) + group["grads"][i] = all_reduce_sum( + grad, dist_group, dist_group.comp_node + ) + if self._reduce_method == "mean": + group["grads"][i] /= dist_group.size + + def _gen_pack_list(self, group): + if "pack_list" not in group: + dist_group = group["dist_group"] + if dist_group.rank == 0: + pack_list, shape_list = get_pack_list(group, self._param_pack_thd) + get_client().set_pack_list(dist_group.key, (pack_list, shape_list)) + else: + pack_list, shape_list = get_client().get_pack_list(dist_group.key) + group["pack_list"] = pack_list + group["shape_list"] = shape_list + + def backward(self, loss: Tensor): + ret = super().backward(loss) + if is_distributed(): + for group in self.param_groups: + if self._param_pack and group["dist_group"] is not None: + self._gen_pack_list(group) + return ret + + def step(self): + if is_distributed(): + for group in self.param_groups: + device = get_default_device() + for param in group["params"]: + if param.__wrapped__ not in self._grad_skip: + if param.grad.device != device: + param.grad = copy(param.grad, device) + if self._bcast_period > 0: + self._bcast_iter += 1 + if self._bcast_iter == self._bcast_period: + self.bcast_param() + self._bcast_iter = 0 + super().step() + + def bcast_param(self): + device = get_default_device() + for group in self.param_groups: + for param in group["params"]: + dist_group = group["dist_group"] + new_param = broadcast(param, dist_group) + if new_param.device != device: + new_param = copy(new_param, device) + add_update(param, new_param, alpha=0) + param._reset(new_param) diff --git a/imperative/python/megengine/optimizer/lr_scheduler.py b/imperative/python/megengine/optimizer/lr_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..46d08d5dc0d4a7a125dc49b108ebca9af43b3018 --- /dev/null +++ b/imperative/python/megengine/optimizer/lr_scheduler.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from abc import ABCMeta + +from .distributed_optimizer import DistributedOptimizer + + +class LRScheduler(metaclass=ABCMeta): + r"""Base class for all learning rate based schedulers. + + :param optimizer: Wrapped optimizer. + :param current_epoch: The index of current epoch. Default: -1 + """ + + def __init__( # pylint: disable=too-many-branches + self, optimizer: DistributedOptimizer, current_epoch: int = -1 + ): + if not isinstance(optimizer, DistributedOptimizer): + raise TypeError( + "optimizer argument given to the lr_scheduler should be Optimizer" + ) + self.optimizer = optimizer + self.current_epoch = current_epoch + if current_epoch == -1: + for group in self.optimizer.param_groups: + group.setdefault("initial_lr", group["lr"]) + else: + for i, group in enumerate(optimizer.param_groups): + if "initial_lr" not in group: + raise KeyError( + "param 'initial_lr' is not specified in " + "param_groups[{}] when resuming an optimizer".format(i) + ) + self.base_lrs = list( + map(lambda group: group["initial_lr"], self.optimizer.param_groups) + ) + + self.step() + + def state_dict(self): + r"""Returns the state of the scheduler as a :class:`dict`. + It contains an entry for every variable in self.__dict__ which + is not the optimizer. + """ + raise NotImplementedError + + def load_state_dict(self, state_dict): + r"""Loads the schedulers state. + + :param state_dict (dict): scheduler state. + """ + raise NotImplementedError + + def get_lr(self): + r""" Compute current learning rate for the scheduler. + """ + raise NotImplementedError + + def step(self, epoch=None): + if epoch is None: + self.current_epoch += 1 + else: + self.current_epoch = epoch + + values = self.get_lr() + for param_group, lr in zip(self.optimizer.param_groups, values): + param_group["lr"] = lr diff --git a/imperative/python/megengine/optimizer/multi_step_lr.py b/imperative/python/megengine/optimizer/multi_step_lr.py new file mode 100644 index 0000000000000000000000000000000000000000..45cc74c3dde18b5feecd775dfa401d38b2de13a4 --- /dev/null +++ b/imperative/python/megengine/optimizer/multi_step_lr.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from bisect import bisect_right +from typing import Iterable as Iter + +from .distributed_optimizer import DistributedOptimizer +from .lr_scheduler import LRScheduler + + +class MultiStepLR(LRScheduler): + r"""Decays the learning rate of each parameter group by gamma once the + number of epoch reaches one of the milestones. + + :param optimizer: Wrapped optimizer. + :param milestones (list): List of epoch indices. Must be increasing. + :param gamma (float): Multiplicative factor of learning rate decay. Default: 0.1. + :param current_epoch: The index of current epoch. Default: -1. + """ + + def __init__( + self, + optimizer: DistributedOptimizer, + milestones: Iter[int], + gamma: float = 0.1, + current_epoch: int = -1, + ): + if not list(milestones) == sorted(milestones): + raise ValueError( + "Milestones should be a list of increasing integers. Got {}".format( + milestones + ) + ) + + self.milestones = milestones + self.gamma = gamma + super().__init__(optimizer, current_epoch) + + def state_dict(self): + r"""Returns the state of the scheduler as a :class:`dict`. + It contains an entry for every variable in self.__dict__ which + is not the optimizer. + """ + return { + key: value + for key, value in self.__dict__.items() + if key in ["milestones", "gamma", "current_epoch"] + } + + def load_state_dict(self, state_dict): + r"""Loads the schedulers state. + + :param state_dict (dict): scheduler state. + """ + tmp_dict = {} + for key in ["milestones", "gamma", "current_epoch"]: + if not key in state_dict.keys(): + raise KeyError( + "key '{}'' is not specified in " + "state_dict when loading state dict".format(key) + ) + tmp_dict[key] = state_dict[key] + + self.__dict__.update(tmp_dict) + + def get_lr(self): + return [ + base_lr * self.gamma ** bisect_right(self.milestones, self.current_epoch) + for base_lr in self.base_lrs + ] diff --git a/imperative/python/megengine/optimizer/optimizer.py b/imperative/python/megengine/optimizer/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..f5bf18b0797f2cf739b7a41f0f2d33bf09635bf5 --- /dev/null +++ b/imperative/python/megengine/optimizer/optimizer.py @@ -0,0 +1,347 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from abc import ABCMeta, abstractmethod +from collections import Iterable +from contextlib import contextmanager +from typing import Dict +from typing import Iterable as Iter +from typing import Set, Union + +import numpy as np + +from ..core.autodiff.grad import Grad +from ..device import get_default_device +from ..distributed.group import get_client, is_distributed +from ..functional import add_update +from ..functional.distributed import all_reduce_sum, broadcast +from ..functional.utils import copy +from ..logger import get_logger +from ..tensor import Tensor, TensorDict +from ..tensor_nn import Buffer, Parameter + +logger = get_logger(__name__) + + +class _RequiredParameter: + def __repr__(self): + return "" + + +required = _RequiredParameter() + + +class Optimizer(metaclass=ABCMeta): + r"""Base class for all optimizers. + + :param params: specifies what Tensors should be optimized. + :param defaults: a dict of default parameters of Optimizer, like learning rate or momentum. + """ + + _recording = None + _grad = None + _gradients = None + + def __init__( # pylint: disable=too-many-branches + self, params: Union[Iter[Parameter], dict], defaults: dict, + ): + self._state = TensorDict() + self._defaults = defaults + + if isinstance(params, (Parameter, dict)): + params = [params] + else: + if not isinstance(params, Iterable): + raise TypeError( + "params argument given to the optimizer should be " + "Parameter or dict, or Iterable of them" + ) + + self.param_groups = [] # type: list + self.save_load_state_ignore_keys = set() + + param_groups = list(params) + if len(param_groups) == 0: + raise ValueError("optimizer got an empty parameter list") + + param_type = type(param_groups[0]) + for param in param_groups: + if not isinstance(param, param_type): + raise TypeError( + "types of params argument given to the optimizer shoud be same" + ) + + if not isinstance(param_groups[0], dict): + param_groups = [{"params": param_groups}] + + for group in param_groups: + self.add_param_group(group) + + for group in self.param_groups: + self._create_state(group) + + def add_param_group(self, param_group: dict): + r"""Add a param group to ``param_groups`` of the :class:`~megengine.optim.optimizer.Optimizer`. + + This can be useful when fine tuning a pre-trained network as frozen layers can be made + trainable and added to the :class:`~megengine.optim.optimizer.Optimizer` as training progresses. + + :param param_group: specifies what tensors should be optimized along with group. + + """ + assert isinstance(param_group, dict), "param group must be a dict" + + if isinstance(param_group["params"], Parameter): + param_group["params"] = [param_group["params"]] + else: + param_group["params"] = list(param_group["params"]) + + for param in param_group["params"]: + if not isinstance(param, Parameter): + raise TypeError( + "optimizer can only optimize Parameters, but one of the params is " + + type(param) + ) + if not param.requires_grad: + raise ValueError( + "optimizer can only optimize Parameters with requires_grad=True" + ) + + for name, default in self._defaults.items(): + if default is required and name not in param_group: + raise ValueError( + "parameter group didn't specify a value of " + "required optimization parameter " + name + ) + param_group.setdefault(name, default) + + param_set = set() + + for group in self.param_groups: + param_set.update(set(map(id, group["params"]))) + + assert param_set.isdisjoint( + set(map(id, param_group["params"])) + ), "some parameters appear in more than one parameter group" + + self.param_groups.append(param_group) + + def _add_state(self, param, state_name, initializer=None): + if initializer is None: + initializer = np.zeros(param.shape, dtype=np.float32) + state_dict = self._state.setdefault(param, {}) + assert state_name not in state_dict + state = Buffer(initializer) + state_dict[state_name] = state + + @abstractmethod + def _create_state(self, param_group): + pass + + @abstractmethod + def _updates(self, param_group): + pass + + def _get_params(self): + params = [] + for group in self.param_groups: + for param in group["params"]: + params.append(param) + return params + + def grad_callback(self, grad, i, group): + pass + + def record(self): + @contextmanager + def recorder(): + params = self._get_params() + grad = Grad() + gradients = [None] * len(params) + if self._recording: + raise RuntimeError("already recording!") + try: + self._recording = True + self._grad = grad + for group in self.param_groups: + group["grads"] = [None] * len(group["params"]) + for i, param in enumerate(group["params"]): + + def callback(tensor, grad, i=i, group=group, self=self): + group["grads"][i] = grad + self.grad_callback(grad, i, group) + + grad.wrt(param, callback=callback) + with grad: + yield + finally: + self._recording = False + self._grad = None + for group in self.param_groups: + group["grads"] = [] + + return recorder() + + def _calculate_gradients(self, loss: Tensor): + if not self._recording: + raise RuntimeError( + "no computation history. " + "did you forget record() or " + "call a method that clears the history?" + ) + assert self._grad is not None + + if len(loss.__wrapped__._extra_data) == 0: # in case loss depends on no tensor + self._grad = None + return + + one = Tensor([1.0], dtype=loss.dtype, device=loss.device) + one = one.reshape(loss.shape) + try: + self._grad(loss, one) + finally: + self._grad = None + + def minimize(self, loss: Tensor): + self.backward(loss) + self.step() + + def backward(self, loss: Tensor): + """Computes the back-propagation of the network given loss. + + :param loss: The obtained loss tensor + """ + rst = [] + self._calculate_gradients(loss) + + # _grad_skip records the parameters which are not in the path of backward + self._grad_skip = set() + for group in self.param_groups: + # _grad_skip is consumed in optimizer.step() + # XXX: assumptions + # 1. Assume the same execution sequence for all GPUs in data parallel + # 2. If backward is called by multiple times to accumulate grad, + # it's also assumed same _grad_skip for all backward() calls + # Please change the code if any assumption is invalid + for param, grad in zip(group["params"], group["grads"]): + if grad is None: + self._grad_skip.add(param.__wrapped__) + continue + grad = Buffer(grad) + if getattr(param, "grad", None) is None: + param.grad = grad + else: + assert isinstance(param.grad, Buffer) + param.grad += grad + rst.append(param.grad) + if len(self._grad_skip) > 0: + get_logger(__name__).warning( + "{} parameters have no grad! " + "Make sure you pass the right parameters list".format( + len(self._grad_skip) + ) + ) + return rst + + def step(self): + r"""Performs a single optimization step. + + """ + for group in self.param_groups: + if isinstance(group["params"], set): + raise TypeError( + "optimized parameters need to be organized in ordered collections, " + "but the ordering of parameters in sets will change between runs. " + "Please use a list instead." + ) + self._updates(group) + + def zero_grad(self): + r"""Reset the grad to zeros. + + """ + for param_group in self.param_groups: + for param in param_group["params"]: + if getattr(param, "grad", None) is not None: + param.grad = None + + def add_save_load_state_ignore_keys(self, keys: Set[str]): + self.save_load_state_ignore_keys |= keys + + def state_dict(self) -> Dict: + r"""Export the optimizer state. + + :return: optimizer state. Can be loaded by :meth:`load_state_dict`. + """ + param_groups = [] + state = dict() + param2id = TensorDict() + + cur_id = 0 + for group in self.param_groups: + for param in group["params"]: + if param not in param2id: + param2id[param] = cur_id + cur_id += 1 + + for param, st in self._state.items(): + state[param2id[param]] = st + + for group in self.param_groups: + param_group = { + k: v + for k, v in group.items() + if k != "params" and k not in self.save_load_state_ignore_keys + } + param_group["params"] = [param2id[param] for param in group["params"]] + param_groups.append(param_group) + + return {"param_groups": param_groups, "state": state} + + def load_state_dict(self, state: dict): + r"""Loads the optimizer state. + + :param state: optimizer state. Should be an object returned + from a call to :meth:`state_dict`. + """ + if len(self.param_groups) != len(state["param_groups"]): + raise ValueError( + "loaded state dict has a different number of parameter groups" + ) + parameter_map = dict() # type: Dict + for group_new, group_saved in zip(self.param_groups, state["param_groups"]): + if len(group_new["params"]) != len(group_saved["params"]): + raise ValueError( + "loaded state dict contains a parameter group that " + "doesn't match the size of optimizer's group" + ) + for param_new, param_saved in zip( + group_new["params"], group_saved["params"] + ): + p = param_new + self._state[p] = state["state"][param_saved].copy() + for k, v in self._state[p].items(): + if isinstance(v, Buffer): + self._state[p][k] = Buffer(v.numpy()) + + new_keys = set(group_new.keys()) - self.save_load_state_ignore_keys + saved_keys = set(group_saved.keys()) - self.save_load_state_ignore_keys + if new_keys != saved_keys: + raise ValueError( + "loaded state dict contains a parameter group that " + "doesn't match the keys of optimizer's group" + ) + for key in saved_keys: + if key != "params": + group_new[key] = group_saved[key] + + if len(self._state.keys()) != len(state["state"].keys()): + raise ValueError( + "loaded state dict contains a state that doesn't match " + "the size of optimizer's state" + ) diff --git a/imperative/python/megengine/optimizer/param_pack.py b/imperative/python/megengine/optimizer/param_pack.py new file mode 100644 index 0000000000000000000000000000000000000000..ea117aa84b1c80faaf9bbf9a30c508b4c27c4335 --- /dev/null +++ b/imperative/python/megengine/optimizer/param_pack.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +from ..functional import param_pack_concat, param_pack_split +from ..functional.distributed import all_reduce_sum +from ..tensor import Tensor + + +def get_offsets(shapes): + offsets = [] + offset = 0 + for shape in shapes: + offsets.append(offset) + offset += int(np.prod(shape)) + offsets.append(offset) + return offsets + + +def get_pack_list(param_group, param_pack_thd): + pack_list = dict() + shape_list = dict() + pack_sum = dict() + pack_ret, shape_ret = [], [] + ignore_first = 8 + ignore_last = 0 + orders_len = len(param_group["orders"]) + for i, idx in enumerate(param_group["orders"]): + param = param_group["params"][idx] + dtype = str(np.dtype(param.dtype)) + dtype_size = np.dtype(param.dtype).itemsize + shape = param.shape + if ignore_first > 0: + ignore_first -= 1 + pack_ret.append([idx]) + shape_ret.append([shape]) + continue + if dtype in pack_list.keys(): + pack_list[dtype].append(idx) + shape_list[dtype].append(shape) + pack_sum[dtype] += int(np.prod(shape)) + else: + pack_list[dtype] = [idx] + shape_list[dtype] = [shape] + pack_sum[dtype] = int(np.prod(shape)) + if ( + pack_sum[dtype] * dtype_size > param_pack_thd + or i + ignore_last > orders_len + ): + pack_ret.append(pack_list[dtype]) + shape_ret.append(shape_list[dtype]) + pack_list[dtype] = [] + shape_list[dtype] = [] + pack_sum[dtype] = 0 + for key in sorted(pack_list.keys()): + if len(pack_list[key]) > 0: + pack_ret.append(pack_list[key]) + shape_ret.append(shape_list[key]) + return pack_ret, shape_ret + + +def pack_allreduce_split(group, pack, shapes, reduce_method): + dist_group = group["dist_group"] + grads = [group["grads"][idx] for idx in pack] + offsets_val = get_offsets(shapes) + offsets = Tensor(offsets_val) + packed_grads = param_pack_concat(grads, offsets, offsets_val) + packed_grads = all_reduce_sum(packed_grads, dist_group, dist_group.comp_node) + if reduce_method == "mean": + packed_grads /= dist_group.size + grads = param_pack_split(packed_grads, offsets_val, shapes) + for i, grad in enumerate(grads): + group["grads"][pack[i]] = grad diff --git a/imperative/python/megengine/optimizer/sgd.py b/imperative/python/megengine/optimizer/sgd.py new file mode 100644 index 0000000000000000000000000000000000000000..4dfb485bb49b26db03733c7d864643b30aab7a03 --- /dev/null +++ b/imperative/python/megengine/optimizer/sgd.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Iterable, Union + +from ..tensor_nn import Buffer, Parameter +from .distributed_optimizer import DistributedOptimizer + + +class SGD(DistributedOptimizer): + r"""Implements stochastic gradient descent. + + Nesterov momentum is based on the formula from + `"On the importance of initialization and momentum in deep learning" `_ . + + :param params: iterable of parameters to optimize or dicts defining + parameter groups. + :param lr: learning rate. + :param momentum: momentum factor. Default: 0.0 + :param weight_decay: weight decay (L2 penalty). Default: 0.0 + """ + + def __init__( + self, + params: Union[Iterable[Parameter], dict], + lr: float, + momentum: float = 0.0, + weight_decay: float = 0.0, + **kwargs + ): + assert lr >= 0.0, "Invalid learning rate: {}".format(lr) + assert momentum >= 0.0, "Invalid momentum value: {}".format(momentum) + assert weight_decay >= 0.0, "Invalid weight_decay value: {}".format( + weight_decay + ) + + defaults = dict(lr=lr, momentum=momentum, weight_decay=weight_decay) + super().__init__(params, defaults, **kwargs) + + def _create_state(self, param_group): + if param_group["momentum"] != 0.0: + for param in param_group["params"]: + self._add_state(param, "momentum_buffer") + + def _updates(self, param_group): + lr = param_group["lr"] + weight_decay = param_group["weight_decay"] + momentum = param_group["momentum"] + + for param in param_group["params"]: + + if param.__wrapped__ in self._grad_skip: + self._grad_skip.remove(param.__wrapped__) + continue + + if not isinstance(param.grad, Buffer): + raise TypeError( + "grad must be a Buffer, maybe you forget to call backward()?" + ) + + if not param.requires_grad: + continue + + grad = param.grad + if weight_decay != 0.0: + grad += param * weight_decay + + if momentum: + v = self._state[param]["momentum_buffer"] + v = momentum * v + grad + param -= lr * v + self._state[param]["momentum_buffer"]._reset(v) + else: + param -= lr * grad + + assert len(self._grad_skip) == 0 diff --git a/imperative/python/megengine/quantization/__init__.py b/imperative/python/megengine/quantization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9c8a0e0da5f9f7c8609584653f68b1d3ab584c85 --- /dev/null +++ b/imperative/python/megengine/quantization/__init__.py @@ -0,0 +1,20 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +from .fake_quant import FakeQuantize +from .internal_fake_quant import * +from .observer import HistogramObserver, Observer +from .qconfig import ( + QConfig, + calibration_qconfig, + ema_fakequant_qconfig, + ema_lowbit_fakequant_qconfig, + min_max_fakequant_qconfig, + tqt_quant_qconfig, +) +from .utils import QuantMode diff --git a/imperative/python/megengine/quantization/fake_quant.py b/imperative/python/megengine/quantization/fake_quant.py new file mode 100644 index 0000000000000000000000000000000000000000..941445c310eefe9f5a650578db3a79a75c8cc1b9 --- /dev/null +++ b/imperative/python/megengine/quantization/fake_quant.py @@ -0,0 +1,154 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import math +from typing import Iterable + +import numpy as np + +from .. import functional as F +from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype +from ..core.tensor.function import Function +from ..module import Module +from ..tensor import Tensor +from ..tensor_nn import Parameter +from .utils import QuantMode, fake_quant_tensor, get_qparam_dict + + +class _FakeQuantize(Module): + r""" + A Basic Fake Quant module. + + :param dtype: A string indicating the target quantization type of input. + :param narrow_range: Whether the absolute value of ``qmin`` is the same as ``qmax``, + instead of 1 greater. Usually True for weight and False for activation. + :param enable: Whether do ``normal_forward`` or ``fake_quant_forward``. + """ + + def __init__(self, dtype: str, narrow_range: bool = False, enable: bool = True): + super().__init__() + if not dtype in _metadata_dict.keys(): + raise ValueError( + "unknown dtype: {}, only support {}".format( + dtype, _metadata_dict.keys() + ) + ) + self.dtype = dtype + self.narrow_range = narrow_range + self.qmin = ( + -_metadata_dict[dtype].qmax if narrow_range else _metadata_dict[dtype].qmin + ) + self.qmax = _metadata_dict[dtype].qmax + self.enabled = enable + + def enable(self): + self.enabled = True + + def disable(self): + self.enabled = False + + def fake_quant_forward(self, inp, q_dict=None): + return inp + + def normal_foward(self, inp, q_dict=None): + return inp + + def forward(self, inp, q_dict=None): + if self.enabled: + return self.fake_quant_forward(inp, q_dict=q_dict) + else: + return self.normal_foward(inp, q_dict=q_dict) + + +class TQT_Function(Function): + def __init__(self, lowerbound, upperbound): + super().__init__() + self.lowerbound = lowerbound + self.upperbound = upperbound + self.saved_tensors = () + + def save_for_backward(self, *tensors: Iterable[Tensor]): + """ + Saves tensors needed for gradient computation. This method should be called only + once in :meth:`~.function.Function.forward`, additional calls will replace values saved previously. + + The saved tensors can be accessed through the ``saved_tensors`` attribute. + """ + self.saved_tensors = tensors + + def forward(self, inp, scale): + t = 2 ** scale + # t = F.maximum(t, 1e-4) + inp_scaled = inp / t + inp_clipped = F.maximum(F.minimum(inp_scaled, self.upperbound), self.lowerbound) + inp_rounded = F.round(inp_clipped) + inp_flq = inp_rounded * t + self.save_for_backward(inp_scaled, inp_rounded, t) + return inp_flq + + def backward(self, grad_inp_flq): + (inp_scaled, inp_rounded, t) = self.saved_tensors + mask_clip = F.logical_and( + inp_scaled < -0.5 + self.lowerbound, inp_scaled > self.upperbound + 0.5 + ) # mask for accumulating the gradients of |data_scaled|>L + mask_quant = F.logical_not(mask_clip) + grad_quant = ( + grad_inp_flq * mask_quant * (inp_rounded - inp_scaled) + ) # gradient within |data_scaled|<=L + grad_clip = ( + grad_inp_flq * mask_clip * inp_rounded + ) # gradient with | data_scaled|>L + grad_s = grad_clip.sum() + grad_quant.sum() + # dL/ds = dL/dt * t * ln(2) + grad_s = grad_s * t * math.log(2) + grad_inp = grad_inp_flq * mask_quant + return grad_inp, grad_s + + +class TQT(_FakeQuantize): + r""" + TQT: https://arxiv.org/abs/1903.08066 Trained Quantization Thresholds + for Accurate and Efficient Fixed-Point Inference of Deep Neural Networks. + """ + + def __init__(self, dtype: str, narrow_range: bool = False, enable: bool = True): + super().__init__(dtype, narrow_range, enable) + self.scale = Parameter(0.0, dtype=np.float32) + + def fake_quant_forward(self, inp, q_dict=None): + # when enable, TQT will do fakequant forward, finetune the scale + return TQT_Function(self.qmin, self.qmax)(inp, self.scale) + + def normal_foward(self, inp, q_dict=None): + if q_dict["enable_observer"]: + # when disable, TQT will do normal forward, initialize scale weight + tmp_scale = F.maximum(F.abs(q_dict["min_val"]), F.abs(q_dict["max_val"])) + tmp_scale = F.log(tmp_scale / 127) / math.log(2) + F.add_update(self.scale, tmp_scale, alpha=0.0, beta=1.0, bias=0.0) + return inp + + def get_qparams(self): + q_dict = get_qparam_dict(QuantMode.TQT) + q_dict["scale"] = 2 ** self.scale + return q_dict + + def get_dtype(self): + q_dict = self.get_qparams() + scale = None if "scale" not in q_dict else q_dict["scale"].numpy()[0] + zero_point = ( + None if "zero_point" not in q_dict else q_dict["zero_point"].numpy()[0] + ) + return get_quantized_dtype(self.dtype, scale, zero_point) + + +class FakeQuantize(_FakeQuantize): + r""" + A module to do quant and dequant according to observer's scale and zero_point. + """ + + def fake_quant_forward(self, inp, q_dict=None): + return fake_quant_tensor(inp, self.qmin, self.qmax, q_dict) diff --git a/imperative/python/megengine/quantization/internal_fake_quant.py b/imperative/python/megengine/quantization/internal_fake_quant.py new file mode 100644 index 0000000000000000000000000000000000000000..02d1d89767eab1ba801c075a7cea3e53edcaed39 --- /dev/null +++ b/imperative/python/megengine/quantization/internal_fake_quant.py @@ -0,0 +1,19 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import copy +import math +from functools import partial + +import numpy as np + +from .. import functional as F +from ..core.tensor.function import Function +from .fake_quant import _FakeQuantize +from .observer import MinMaxObserver +from .qconfig import QConfig + diff --git a/imperative/python/megengine/quantization/observer.py b/imperative/python/megengine/quantization/observer.py new file mode 100644 index 0000000000000000000000000000000000000000..3aa610820e59bff872e91123c2823b7b3ab0bf24 --- /dev/null +++ b/imperative/python/megengine/quantization/observer.py @@ -0,0 +1,404 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import math +from abc import abstractmethod + +import numpy as np + +from .. import functional as F +from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype +from ..module import Module +from ..tensor_nn import Buffer +from .utils import QuantMode, Round, get_qparam_dict + + +class Observer(Module): + r""" + A base class for Observer Module. + + :param dtype: a string indicating to collect scale and zero_point of which dtype + :param narrow_range: Whether the absolute value of ``qmin`` is the same as ``qmax``, + instead of 1 greater. Usually True for weight and False for activation. + """ + + def __init__(self, dtype: str, narrow_range: bool = False): + super().__init__() + if dtype not in _metadata_dict.keys(): + raise ValueError( + "unknown dtype: {}, only support {}".format( + dtype, _metadata_dict.keys() + ) + ) + self.dtype = dtype + self.narrow_range = narrow_range + self.qmin = ( + -_metadata_dict[dtype].qmax if narrow_range else _metadata_dict[dtype].qmin + ) + self.qmax = _metadata_dict[dtype].qmax + self.enabled = True + + def get_dtype(self): + q_dict = self.get_qparams() + numpy_scale = None if "scale" not in q_dict else q_dict["scale"].numpy()[0] + numpy_zero_point = ( + None if "zero_point" not in q_dict else q_dict["zero_point"].numpy()[0] + ) + return get_quantized_dtype(self.dtype, numpy_scale, numpy_zero_point) + + def enable(self): + self.enabled = True + + def disable(self): + self.enabled = False + + def train(self, mode: bool = True, recursive: bool = True) -> None: + super().train(mode, recursive) + if mode: + self.enable() + else: + self.disable() + + @abstractmethod + def forward(self, x): + pass + + @abstractmethod + def get_qparams(self, **kwargs): + pass + + +class MinMaxObserver(Observer): + def __init__( + self, + mode=QuantMode.SYMMERTIC, + eps=0.00001, + dtype="qint8", + narrow_range: bool = False, + ): + super().__init__(dtype, narrow_range) + self.mode = mode + self.min_val = Buffer(np.finfo(np.float32).max, dtype=np.float32) + self.max_val = Buffer(np.finfo(np.float32).min, dtype=np.float32) + self.scale_limit = eps + + def _calculate_qparams(self, inp_min_val, inp_max_val): + min_val = F.minimum(0.0, inp_min_val) + max_val = F.maximum(0.0, inp_max_val) + q_dict = get_qparam_dict(self.mode) + q_dict["min_val"] = inp_min_val + q_dict["max_val"] = inp_max_val + q_dict["enable_observer"] = self.enable + if self.mode == QuantMode.SYMMERTIC: + symmetric_max_vals = F.maximum(-min_val, max_val) + # use maximun to avoid scale too small at the begin + q_dict["scale"] = F.maximum( + symmetric_max_vals / ((self.qmax - self.qmin) / 2), self.scale_limit + ) + # zero_point = self.zero_point + else: + # use maximun to avoid scale too small at the begin + q_dict["scale"] = F.maximum( + (max_val - min_val) / (self.qmax - self.qmin), self.scale_limit, + ) + # caculate zero_point + q_dict["zero_point"] = self.qmin - Round()((min_val / q_dict["scale"])) + + return q_dict + + def get_qparams(self): + return self._calculate_qparams(self.min_val, self.max_val) + + def forward(self, x_orig): + if self.enabled: + # stop gradient + x = x_orig.detach() + # find max and min + self.min_val = F.minimum(self.min_val, x.min()) + self.max_val = F.maximum(self.max_val, x.max()) + return x_orig + + +class ExponentialMovingAverageObserver(MinMaxObserver): + def __init__( + self, + momentum=0.9, + mode=QuantMode.SYMMERTIC, + eps=0.00001, + dtype="qint8", + narrow_range: bool = False, + ): + super().__init__(mode, eps, dtype, narrow_range) + self.momentum = Buffer(momentum) + self.runtime_momentum = Buffer(0.0) + + def set_momentum(self, momentum): + self.momentum.set_value(momentum) + + def forward(self, x_orig): + if self.enabled: + # stop gradient + x = x_orig.detach() + # Exponential Moving Average + self.min_val = ( + self.min_val * self.runtime_momentum + + (1 - self.runtime_momentum) * x.min() + ) + self.max_val = ( + self.max_val * self.runtime_momentum + + (1 - self.runtime_momentum) * x.max() + ) + self.runtime_momentum = self.momentum + + return x_orig + + +class HistogramObserver(MinMaxObserver): + def __init__( + self, + bins=2048, + upsample_rate=128, + mode=QuantMode.SYMMERTIC, + eps=0.00001, + dtype="qint8", + narrow_range: bool = False, + ): + super().__init__(mode, eps, dtype, narrow_range) + self.bins = bins + self.upsample_rate = upsample_rate + self.dst_nbins = _metadata_dict[dtype].qmax - _metadata_dict[dtype].qmin + 1 + self.histogram = Buffer([-1] + [0.0] * (bins - 1)) + + def _non_linear_param_search(self): + r"""Non-linear parameter search. + An approximation for L2 error minimization for selecting min/max. + By selecting new min/max, we filter out outliers in input distribution. + """ + + np_min_val = self.min_val.numpy()[0] + np_max_val = self.max_val.numpy()[0] + np_histogram = self.histogram.numpy() + assert len(np_histogram) == self.bins, "bins mistmatch" + bin_width = (np_max_val - np_min_val) / self.bins + + def _get_norm(delta_begin, delta_end, density, norm_type): + r""" + Compute the norm of the values uniformaly distributed between + delta_begin and delta_end. + norm = density * (integral_{begin, end} x^2) + = density * (end^3 - begin^3) / 3 + """ + assert norm_type == "L2", "Only L2 norms are currently supported" + norm = 0.0 + if norm_type == "L2": + norm = ( + delta_end * delta_end * delta_end + - delta_begin * delta_begin * delta_begin + ) / 3 + return density * norm + + def _compute_quantization_error(next_start_bin, next_end_bin, norm_type): + r""" + Compute the quantization error if we use start_bin to end_bin as the + min and max to do the quantization. + """ + + norm = 0.0 + dst_bin_width = ( + bin_width * (next_end_bin - next_start_bin + 1) / self.dst_nbins + ) + if dst_bin_width == 0.0: + return 0.0 + for src_bin in range(self.bins): + # distances from the beginning of first dst_bin to the beginning and + # end of src_bin + src_bin_begin = (src_bin - next_start_bin) * bin_width + src_bin_end = src_bin_begin + bin_width + + # which dst_bins the beginning and end of src_bin belong to? + dst_bin_of_begin = min( + self.dst_nbins - 1, + max(0.0, math.floor(src_bin_begin / dst_bin_width)), + ) + dst_bin_of_end = min( + self.dst_nbins - 1, + max(0.0, math.floor(src_bin_end / dst_bin_width)), + ) + dst_bin_of_begin_center = ( + dst_bin_of_begin * dst_bin_width + dst_bin_width / 2 + ) + + density = np_histogram[src_bin] / bin_width + if dst_bin_of_begin == dst_bin_of_end: + # if src_bin is entirely within 1 dst_bin + delta_begin = src_bin_begin - dst_bin_of_begin_center + delta_end = src_bin_end - dst_bin_of_begin_center + norm = norm + _get_norm(delta_begin, delta_end, density, norm_type) + else: + delta_begin = src_bin_begin - dst_bin_of_begin_center + delta_end = dst_bin_width / 2 + norm = norm + _get_norm(delta_begin, delta_end, density, norm_type) + + norm = norm + (dst_bin_of_end - dst_bin_of_begin - 1) * _get_norm( + -dst_bin_width / 2, dst_bin_width / 2, density, norm_type + ) + + dst_bin_of_end_center = ( + dst_bin_of_end * dst_bin_width + dst_bin_width / 2 + ) + + delta_begin = -dst_bin_width / 2 + delta_end = src_bin_end - dst_bin_of_end_center + norm = norm + _get_norm(delta_begin, delta_end, density, norm_type) + return norm + + # cumulative sum + total = sum(np_histogram) + cSum = np.cumsum(np_histogram, axis=0) + + stepsize = 1e-5 # granularity + alpha = 0.0 # lower bound + beta = 1.0 # upper bound + start_bin = 0 + end_bin = self.bins - 1 + norm_min = float("inf") + + while alpha < beta: + # Find the next step + next_alpha = alpha + stepsize + next_beta = beta - stepsize + + # find the left and right bins between the quantile bounds + l = start_bin + r = end_bin + while l < end_bin and cSum[l] < next_alpha * total: + l = l + 1 + while r > start_bin and cSum[r] > next_beta * total: + r = r - 1 + + # decide the next move + next_start_bin = start_bin + next_end_bin = end_bin + if (l - start_bin) > (end_bin - r): + # move the start bin + next_start_bin = l + alpha = next_alpha + else: + # move the end bin + next_end_bin = r + beta = next_beta + + if next_start_bin == start_bin and next_end_bin == end_bin: + continue + + # calculate the quantization error using next_start_bin and next_end_bin + norm = _compute_quantization_error(next_start_bin, next_end_bin, "L2") + + if norm > norm_min: + break + norm_min = norm + start_bin = next_start_bin + end_bin = next_end_bin + + new_min = self.min_val + bin_width * start_bin + new_max = self.min_val + bin_width * (end_bin + 1) + return new_min, new_max + + def get_qparams(self): + new_min, new_max = self._non_linear_param_search() + return self._calculate_qparams(new_min, new_max) + + def _combine_histograms( + self, orig_hist, new_hist, upsample_rate, downsample_rate, start_idx, Nbins + ): + # First up-sample the histogram with new data by a factor of L + # This creates an approximate probability density thats piecwise constant + upsampled_histogram = new_hist.repeat(upsample_rate) + # Now insert the upsampled histogram into the output + # histogram, which is initialized with zeros. + # The offset at which the histogram is introduced is determined + # by the start index as the output histogram can cover a wider range + histogram_with_output_range = np.zeros((Nbins * downsample_rate)) + histogram_with_output_range[ + start_idx : Nbins * upsample_rate + start_idx + ] = upsampled_histogram + # Compute integral histogram, double precision is needed to ensure + # that there are no overflows + integral_histogram = np.cumsum(histogram_with_output_range, 0)[ + downsample_rate - 1 :: downsample_rate + ] + # Finally perform interpolation + shifted_integral_histogram = np.zeros((Nbins)) + shifted_integral_histogram[1:Nbins] = integral_histogram[0:-1] + interpolated_histogram = ( + integral_histogram - shifted_integral_histogram + ) / upsample_rate + orig_hist = orig_hist + interpolated_histogram + return orig_hist + + def _adjust_min_max(self, combined_min, combined_max, upsample_rate): + # We ensure that: + # (combined_max - combined_min)/(downsample_rate*Nbins) = (max - min)/(upsample_rate*Nbins) + # This allows us to have a common grid of resolution s, where we can align + # the input histogram + # start_idx maps min_val to the histogram bin index. + np_min_val = self.min_val.numpy()[0] + np_max_val = self.max_val.numpy()[0] + + hist_bin_width = (np_max_val - np_min_val) / (self.bins * upsample_rate) + downsample_rate = int( + np.ceil((combined_max - combined_min) / (self.bins * hist_bin_width)) + ) + e = downsample_rate * (self.bins * hist_bin_width) - ( + combined_max - combined_min + ) + combined_max = combined_max + e / 2 + combined_min = combined_min - e / 2 + start_idx = int(np.round((np_min_val - combined_min) / hist_bin_width)) + + return combined_min, combined_max, downsample_rate, start_idx + + def sideeffect_forward(self, x_orig): + x = x_orig.numpy() + min_val = self.min_val.numpy()[0] + max_val = self.max_val.numpy()[0] + histogram = self.histogram.numpy() + new_min = x.min() + new_max = x.max() + if histogram[0] == -1: + new_histogram, _ = np.histogram(x, self.bins, (new_min, new_max)) + else: + new_min = min(new_min, min_val) + new_max = max(new_max, max_val) + # combine the existing histogram and new histogram into 1 histogram + # We do this by first upsampling the histogram to a dense grid + # and then downsampling the histogram efficiently + (new_min, new_max, downsample_rate, start_idx,) = self._adjust_min_max( + new_min, new_max, self.upsample_rate + ) + + new_histogram, _ = np.histogram(x, self.bins, (new_min, new_max)) + new_histogram = new_histogram.astype(np.float64) + if new_min == min_val and new_max == max_val: + new_histogram += histogram + else: + new_histogram = self._combine_histograms( + new_histogram, + histogram, + self.upsample_rate, + downsample_rate, + start_idx, + self.bins, + ) + + self.histogram.set_value(new_histogram) + self.min_val.set_value(new_min) + self.max_val.set_value(new_max) + + def forward(self, x_orig): + self.sideeffect_forward(x_orig) + return x_orig diff --git a/imperative/python/megengine/quantization/qconfig.py b/imperative/python/megengine/quantization/qconfig.py new file mode 100644 index 0000000000000000000000000000000000000000..6606c1a513be2cf3d1a766a7c044f550b6c8480d --- /dev/null +++ b/imperative/python/megengine/quantization/qconfig.py @@ -0,0 +1,109 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +#' +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from functools import partial + +from ..module import Module +from .fake_quant import TQT, FakeQuantize +from .observer import ( + ExponentialMovingAverageObserver, + HistogramObserver, + MinMaxObserver, +) + + +class QConfig: + r""" + A config class indicating how to do quantize toward :class:`~.QATModule`'s + ``activation`` and ``weight``. See :meth:`~.QATModule.set_qconfig` for detail usage. + + :param weight_observer: interface to instantiate an :class:`~.Observer` indicating + how to collect scales and zero_point of wegiht. + :param act_observer: similar to ``weight_observer`` but toward activation. + :param weight_fake_quant: interface to instantiate a :class:`~.FakeQuantize` indicating + how to do fake_quant calculation. + :param act_observer: similar to ``weight_fake_quant`` but toward activation. + + Examples: + + .. code-block:: + + # Default EMA QConfig for QAT. + ema_fakequant_qconfig = QConfig( + weight_observer=partial(MinMaxObserver, dtype="qint8", narrow_range=True), + act_observer=partial(ExponentialMovingAverageObserver, dtype="qint8", narrow_range=False), + weight_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=True), + act_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=False), + ) + + Each parameter is a ``class`` rather than an instance. And we recommand using ``functools.partial`` + to add initialization parameters of the ``class``, so that don't need to provide parameters in + :meth:`~.QATModule.set_qconfig`. + + Usually we set ``narrow_range`` of weight related paramters to ``True`` and of activation related + parameters to ``False``. For the result of multiplication and addition as ``a * b + c * d``, if + four variables are all -128 of dtype ``qint8``, then the result will be ``2^15`` and cause overflow. + Weights are commonly calculated in this way, so needed to narrow the range. + """ + + def __init__( + self, weight_observer, act_observer, weight_fake_quant, act_fake_quant + ): + if isinstance(act_observer, Module) or isinstance(weight_observer, Module): + raise ValueError( + "QConfig must not receive observer instance, please pass observer" + " class generator using `partial(Observer, ...)` instead. Use" + " partial(MyObserver, x=1) to override arguments to constructor if needed" + ) + self.weight_observer = weight_observer + self.act_observer = act_observer + self.weight_fake_quant = weight_fake_quant + self.act_fake_quant = act_fake_quant + + +tqt_quant_qconfig = QConfig( + weight_observer=partial( + ExponentialMovingAverageObserver, dtype="qint8", narrow_range=True + ), + act_observer=partial( + ExponentialMovingAverageObserver, dtype="qint8", narrow_range=False + ), + weight_fake_quant=partial(TQT, dtype="qint8", narrow_range=True), + act_fake_quant=partial(TQT, dtype="qint8", narrow_range=False), +) + +min_max_fakequant_qconfig = QConfig( + weight_observer=partial(MinMaxObserver, dtype="qint8", narrow_range=True), + act_observer=partial(MinMaxObserver, dtype="qint8", narrow_range=False), + weight_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=True), + act_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=False), +) + +ema_fakequant_qconfig = QConfig( + weight_observer=partial(MinMaxObserver, dtype="qint8", narrow_range=True), + act_observer=partial( + ExponentialMovingAverageObserver, dtype="qint8", narrow_range=False + ), + weight_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=True), + act_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=False), +) + +ema_lowbit_fakequant_qconfig = QConfig( + weight_observer=partial(MinMaxObserver, dtype="qint4", narrow_range=False), + act_observer=partial( + ExponentialMovingAverageObserver, dtype="qint4", narrow_range=False + ), + weight_fake_quant=partial(FakeQuantize, dtype="qint4", narrow_range=False), + act_fake_quant=partial(FakeQuantize, dtype="qint4", narrow_range=False), +) + +calibration_qconfig = QConfig( + weight_observer=partial(MinMaxObserver, dtype="qint8", narrow_range=True), + act_observer=partial(HistogramObserver, dtype="qint8", narrow_range=False), + weight_fake_quant=None, + act_fake_quant=None, +) diff --git a/imperative/python/megengine/quantization/quantize.py b/imperative/python/megengine/quantization/quantize.py new file mode 100644 index 0000000000000000000000000000000000000000..5dab2ae4e47e6d01dd557ead8b6ea95a73c90898 --- /dev/null +++ b/imperative/python/megengine/quantization/quantize.py @@ -0,0 +1,191 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from copy import copy, deepcopy +from typing import Callable, Dict, Tuple + +from .. import module as Float +from ..module import Module +from ..module import qat as QAT +from ..module import quantized as Quantized +from ..module.qat import QATModule +from ..module.quantized import QuantizedModule +from .fake_quant import TQT +from .qconfig import QConfig, ema_fakequant_qconfig + + +def _get_quantable_module_names(): + def is_quantable(key: str): + value = getattr(Quantized, key) + return ( + isinstance(value, type) + and issubclass(value, QuantizedModule) + and value != QuantizedModule + ) + + # source should have all quantable modules' names + quantable_module_names = [key for key in dir(Quantized) if is_quantable(key)] + return quantable_module_names + + +def _get_convert_dict() -> Tuple[ + Dict[Module, QATModule], Dict[QATModule, QuantizedModule] +]: + quantable_module_names = _get_quantable_module_names() + + quantable_modules = [getattr(Float, key) for key in quantable_module_names] + qat_modules = [getattr(QAT, key) for key in quantable_module_names] + quantized_modules = [getattr(Quantized, key) for key in quantable_module_names] + + float2qat_dict = dict(zip(quantable_modules, qat_modules)) + qat2quantized_dict = dict(zip(qat_modules, quantized_modules)) + return float2qat_dict, qat2quantized_dict + + +_float2qat_dict, _qat2quantized_dict = _get_convert_dict() + + +def quantize(module: Module, inplace: bool = True, mapping: dict = None): + r""" + Recursively convert :class:`~.QATModule` to :class:`~.QuantizedModule` + through :meth:`~.Module.apply`. + + :param module: root module to do convert recursively. + :param inplace: whether to convert submodules in-place. + :param mapping: a dict indicating how to convert custom modules from QATModule to + QuantizedModule. Will be combined with internal default convert mapping dict. + """ + + if not inplace: + module = deepcopy(module) + + convert_dict = copy(_qat2quantized_dict) + if mapping is not None: + convert_dict.update(mapping) + qat_modules = tuple(convert_dict.keys()) + + def is_qat(mod: Module): + return isinstance(mod, qat_modules) + + # must use list to avoid replacement influencing successor modules + for key, submodule, parent in list( + module._flatten(with_key=True, with_parent=True, predicate=is_qat) + ): + new_mod = convert_dict[type(submodule)].from_qat_module(submodule) + if isinstance(parent, Float.Sequential): + # cannnot use setattr to be compatible with Sequential's ``__setitem__`` + parent[int(key.split(".")[-1])] = new_mod + else: + setattr(parent, key.split(".")[-1], new_mod) + + return module + + +def quantize_qat( + module: Module, + inplace: bool = True, + qconfig: QConfig = ema_fakequant_qconfig, + mapping: dict = None, +): + r""" + Recursively convert float :class:`~.Module` to :class:`~.QATModule` + through :meth:`~.Module.apply` and set qconfig relatively. + + :param module: root module to do convert recursively. + :param inplace: whether to convert submodules in-place. + :param qconfig: an instance of :class:`~.QConfig` to be set as submodules' qconfig. + default is ``ema_fakequant_qconfig``. + :param mapping: a dict indicating how to convert custom modules from Module to QATModule. + Will be combined with internal default convert mapping dict. + """ + + if not inplace: + module = deepcopy(module) + + convert_dict = copy(_float2qat_dict) + if mapping is not None: + convert_dict.update(mapping) + quantable_modules = tuple(convert_dict.keys()) + + def is_quantable(mod: Module): + return isinstance(mod, quantable_modules) + + # must use list to avoid replacement influencing successor modules + for key, submodule, parent in list( + module._flatten(with_key=True, with_parent=True, predicate=is_quantable) + ): + # only convert top quantable module. + if is_quantable(parent) or submodule.quantize_disabled: + continue + + new_mod = convert_dict[type(submodule)].from_float_module(submodule) + if isinstance(parent, Float.Sequential): + # cannnot use setattr to be compatible with Sequential's ``__setitem__`` + parent[int(key.split(".")[-1])] = new_mod + else: + setattr(parent, key.split(".")[-1], new_mod) + + propagate_qconfig(module, qconfig) + return module + + +def _propagate(module: Module, func_str: str, *args, **kargs): + def fn(mod: Module): + if isinstance(mod, QATModule): + getattr(mod, func_str)(*args, **kargs) + + module.apply(fn) + + +def propagate_qconfig(module: QATModule, qconfig: QConfig): + r""" + Recursively set ``module``'s qconfig through :meth:`~.Module.apply`. + + :param module: root module to traverse recursively. + :param qconfig: a instance of :class:`~.QConfig` to be set as submodules' qconfig. + """ + _propagate(module, "set_qconfig", qconfig) + + +def disable_fake_quant(module: Module): + r""" + Recursively disable ``module`` fake quantization in QATModule through :meth:`~.Module.apply` + + :param module: root module to do disable fake quantization recursively. + """ + + _propagate(module, "set_fake_quant", False) + + +def disable_observer(module: Module): + r""" + Recursively disable ``module`` observer in QATModule through :meth:`~.Module.apply` + + :param module: root module to do disable observer recursively. + """ + + _propagate(module, "set_observer", False) + + +def enable_fake_quant(module: Module): + r""" + Recursively enable ``module`` fake quantization in QATModule through :meth:`~.Module.apply` + + :param module: root module to do enable fake quantization recursively. + """ + + _propagate(module, "set_fake_quant", True) + + +def enable_observer(module: Module): + r""" + Recursively enable ``module`` observer in QATModule through :meth:`~.Module.apply` + + :param module: root module to do enable observer recursively. + """ + + _propagate(module, "set_observer", True) diff --git a/imperative/python/megengine/quantization/utils.py b/imperative/python/megengine/quantization/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..810bbbb3502c8ff44a1b75d4162bb5659c4b3998 --- /dev/null +++ b/imperative/python/megengine/quantization/utils.py @@ -0,0 +1,116 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from enum import Enum +from functools import partial, update_wrapper, wraps +from typing import Dict + +from .. import functional as F +from ..core.tensor.dtype import _metadata_dict +from ..core.tensor.function import Function +from ..tensor import Tensor + + +class Round(Function): + """ + The functional round have no grad and can not use for quantization-aware-training. + We use Function and STE(Straight-Through Estimator) to implement backward propagation. + """ + + def forward(self, x): + return F.round(x) + + def backward(self, output_grads): + return output_grads + + +def register_method_to_class(cls): + def decorator(func): + @wraps(func) + def wrapper(self, *args, **kwargs): + return func(self, *args, **kwargs) + + if isinstance(func, partial): + update_wrapper(func, func.func) + setattr(cls, func.__name__, wrapper) + return func + + return decorator + + +class QuantMode(Enum): + """Quantization mode enumerate class. + """ + + SYMMERTIC = 1 + ASYMMERTIC = 2 + TQT = 3 + + +qparam_dict = { + QuantMode.SYMMERTIC: {"mode": QuantMode.SYMMERTIC, "scale": None,}, + QuantMode.ASYMMERTIC: { + "mode": QuantMode.ASYMMERTIC, + "scale": None, + "zero_point": None, + }, + QuantMode.TQT: {"mode": QuantMode.TQT, "scale": None,}, +} + + +def get_qparam_dict(mode: QuantMode): + """Return the quantization parameters dictory according to the mode. + """ + return qparam_dict.get(mode, None) + + +def fake_quant_tensor(inp: Tensor, qmin: int, qmax: int, q_dict: Dict) -> Tensor: + """Apply fake quantization to the inp tensor. + + :param inp: the input tensor which need to be faked. + :param qmin: the minimum value which the integer limit to. + :param qmax: the maximum value which the integer limit to. + :param q_dict: the quantization parameter dict. + + """ + scale = q_dict["scale"] + zero_point = 0 + if q_dict["mode"] == QuantMode.ASYMMERTIC: + zero_point = q_dict["zero_point"] + # Quant + oup = Round()(inp / scale) + zero_point + # Clip + oup = F.minimum(F.maximum(oup, qmin), qmax) + # Dequant + oup = (oup - zero_point) * scale + return oup + + +def fake_quant_bias(bias: Tensor, inp: Tensor, w_qat: Tensor) -> Tensor: + """Apply fake quantization to bias, the special scale from input tensor + and weight tensor, the quantized type set to qint32 also. + + :param bias: the bias tensor which need to be faked. + :param inp: the input tensor which contain the quantization parameters. + :param qmax: the weight tensor which contain the quantization parameters. + + .. warning:: + Only work for symmetric quantization method now. + + """ + b_qat = bias + if hasattr(inp, "q_dict") and b_qat is not None: + if inp.q_dict["scale"] is not None and w_qat.q_dict["scale"] is not None: + # use the same mode with weight. + b_dict = get_qparam_dict(w_qat.q_dict["mode"]) + b_dict["scale"] = inp.q_dict["scale"] * w_qat.q_dict["scale"] + # TODO: add zero_point for ASYMMERTIC mode. + qmax = _metadata_dict["qint32"].qmax + qmin = _metadata_dict["qint32"].qmin + b_qat = fake_quant_tensor(b_qat, qmin, qmax, b_dict) + + return b_qat diff --git a/imperative/python/megengine/random/__init__.py b/imperative/python/megengine/random/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..86c8d7979c229c7c9697098a5a70135743f15748 --- /dev/null +++ b/imperative/python/megengine/random/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from .distribution import gaussian, uniform +from .rng import manual_seed + +# pylint: disable=undefined-variable +del distribution, rng # type: ignore[name-defined] diff --git a/imperative/python/megengine/random/distribution.py b/imperative/python/megengine/random/distribution.py new file mode 100644 index 0000000000000000000000000000000000000000..2f4655af75a18cf7551ea4a07e14f6ed73829d47 --- /dev/null +++ b/imperative/python/megengine/random/distribution.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Iterable, Optional + +from .. import Tensor +from ..core._imperative_rt import invoke_op +from ..core.ops.builtin import GaussianRNG, UniformRNG +from ..core.tensor import utils +from ..core.tensor.core import apply +from .rng import _random_seed_generator + +__all__ = ["gaussian", "uniform"] + + +def gaussian(shape: Iterable[int], mean: float = 0, std: float = 1,) -> Tensor: + r"""Random variable with Gaussian distribution $N(\mu, \sigma)$ + + :param shape: Output tensor shape + :param mean: The mean or expectation of the distribution + :param std: The standard deviation of the distribution (variance = $\sigma ^ 2$) + :return: The output tensor + + Examples: + + .. testcode:: + + import megengine as mge + import megengine.random as rand + + x = rand.gaussian((2, 2), mean=0, std=1) + print(x.numpy()) + + .. testoutput:: + :options: +SKIP + + [[-0.20235455 -0.6959438 ] + [-1.4939808 -1.5824696 ]] + + """ + seed = _random_seed_generator().__next__() + op = GaussianRNG(seed=seed, mean=mean, std=std) + shape = Tensor(shape, dtype="int32") + (output,) = apply(op, shape) + return output + + +def uniform(shape: Iterable[int], low: float = 0, high: float = 1,) -> Tensor: + r"""Random variable with uniform distribution $U(0, 1)$ + + :param shape: Output tensor shape + :param low: Lower range + :param high: Upper range + :return: The output tensor + + Examples: + + .. testcode:: + + import megengine as mge + import megengine.random as rand + + x = rand.uniform((2, 2)) + print(x.numpy()) + + .. testoutput:: + :options: +SKIP + + [[0.76901674 0.70496535] + [0.09365904 0.62957656]] + + """ + assert low < high, "Uniform is not defined when low >= high" + + seed = _random_seed_generator().__next__() + op = UniformRNG(seed=seed) + shape = Tensor(shape, dtype="int32") + (output,) = apply(op, shape) + + return low + (high - low) * output diff --git a/imperative/python/megengine/random/rng.py b/imperative/python/megengine/random/rng.py new file mode 100644 index 0000000000000000000000000000000000000000..992c6a49565b1349ae5b171156b680cf1e644a4b --- /dev/null +++ b/imperative/python/megengine/random/rng.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import time + +from numpy.random import MT19937 + +_rng = None + + +def _random_seed_generator(): + if _rng is None: + from ..distributed.group import get_rank + + manual_seed(seed=int(time.time()) + get_rank()) + while True: + yield _rng.random_raw() + + +def manual_seed(seed: int): + global _rng # pylint: disable=global-statement + _rng = MT19937(seed=seed) diff --git a/imperative/python/megengine/serialization.py b/imperative/python/megengine/serialization.py new file mode 100644 index 0000000000000000000000000000000000000000..300d92b5d0c8e9f6dd91c2b11589cc4f254e2b5b --- /dev/null +++ b/imperative/python/megengine/serialization.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import pickle + +from .device import _valid_device, get_default_device +from .tensor import Tensor +from .utils.max_recursion_limit import max_recursion_limit + + +def save(obj, f, pickle_module=pickle, pickle_protocol=pickle.HIGHEST_PROTOCOL): + r"""Save an object to disk file. + + :type obj: object + :param obj: object to save. Only ``module`` or ``state_dict`` are allowed. + :type f: text file object + :param f: a string of file name or a text file object to which ``obj`` is saved to. + :type pickle_module: + :param pickle_module: Default: ``pickle``. + :type pickle_protocol: + :param pickle_protocol: Default: ``pickle.HIGHEST_PROTOCOL``. + + """ + if isinstance(f, str): + with open(f, "wb") as fout: + save( + obj, fout, pickle_module=pickle_module, pickle_protocol=pickle_protocol + ) + return + + with max_recursion_limit(): + assert hasattr(f, "write"), "{} does not support write".format(f) + pickle_module.dump(obj, f, pickle_protocol) + + +class dmap: + def __init__(self, map_location): + self.map_location = map_location + + def __enter__(self): + Tensor.dmap_callback = staticmethod(self.map_location) + return self + + def __exit__(self, type, value, traceback): + Tensor.dmap_callback = None + + +def _get_callable_map_location(map_location): + if map_location is None: + + def callable_map_location(state): + return str(get_default_device()) + + elif isinstance(map_location, str): + + def callable_map_location(state): + return map_location + + elif isinstance(map_location, dict): + for key, value in map_location.items(): + # dict key and values can only be "xpux", "cpux", "gpu0", etc. + assert _valid_device(key), "Invalid locator_map key value {}".format(key) + assert _valid_device(value), "Invalid locator_map key value {}".format( + value + ) + + def callable_map_location(state): + if state[:4] in map_location.keys(): + state = map_location[state[:4]] + return state + + else: + assert callable(map_location), "map_location should be str, dict or function" + callable_map_location = map_location + return callable_map_location + + +def load(f, map_location=None, pickle_module=pickle): + r"""Load an object saved with save() from a file. + + :type f: text file object + :param f: a string of file name or a text file object from which to load. + :type map_location: str, dict or a function specifying the map rules + :param map_location: Default: ``None``. + + .. note:: + + map_location defines device mapping. See examples for usage. + + :type pickle_module: + :param pickle_module: Default: ``pickle``. + + .. note:: + + If you will call :func:`mge.set_default_device()`, please do it + before :func:`mge.load()`. + + Examples: + + .. testcode: + + import megengine as mge + # Load tensors to the same device as defined in model.mge + mge.load('model.mge') + # Load all tensors to gpu0. + mge.load('model.mge', map_location='gpu0') + # Load all tensors originally on gpu0 to cpu0 + mge.load('model.mge', map_location={'gpu0':'cpu0'}) + # Load all tensors to cpu0 + mge.load('model.mge', map_location=lambda dev: 'cpu0') + + """ + if isinstance(f, str): + with open(f, "rb") as fin: + return load(fin, map_location=map_location, pickle_module=pickle_module) + + map_location = _get_callable_map_location(map_location) # callable map_location + + with dmap(map_location) as dm: + return pickle_module.load(f) diff --git a/imperative/python/megengine/tensor.py b/imperative/python/megengine/tensor.py new file mode 100644 index 0000000000000000000000000000000000000000..1848463c39605ee460ccdad7f555e178735a7648 --- /dev/null +++ b/imperative/python/megengine/tensor.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + + +import collections + +from .core import Tensor as _Tensor +from .device import get_default_device + + +class Tensor(_Tensor): + requires_grad = False + dmap_callback = None + + def __init__(self, data, dtype=None, device=None): + if device is None: + device = get_default_device() + self.q_dict = {"mode": None, "scale": None, "zero_point": None} + super().__init__(data, dtype=dtype, device=device) + + def set_value(self, value): + self._reset(value) + + def reset_zero(self): + self *= 0 + + def __getstate__(self): + r""" __getstate__ will be called for pickle serialization or deep copy + """ + + state = { + "data": self.numpy(), + "device": str(self.device), + "dtype": self.dtype, + "qdict": self.q_dict, + } + return state + + def __setstate__(self, state): + data = state.pop("data") + device = state.pop("device") + if self.dmap_callback is not None: + assert isinstance(device, str) + device = self.dmap_callback(device) + dtype = state.pop("dtype") + self.q_dict = state.pop("qdict") + super().__init__(data, dtype=dtype, device=device) + + def detach(self): + r""" + Returns a new tensor which is treated as constant during backward gradient calcuation, + i.e. its gradient is zero. + + :param inp: input tensor + + """ + Wrapper = type(self) + Tensor = type(self.__wrapped__) + return Wrapper(Tensor(self.__wrapped__._data)) + + +tensor = Tensor + + +class Dict(collections.MutableMapping): + def __init__(self, *args, key=None, **kwargs): + self.data = {} + if key: + self.keyfn = key + for i in args: + self.update(i) + self.update(**kwargs) + + @staticmethod + def keyfn(key): # pylint: disable=method-hidden + return key + + def __getitem__(self, key): + _, v = self.data[self.keyfn(key)] + return v + + def __setitem__(self, key, value): + self.data[self.keyfn(key)] = key, value + + def __delitem__(self, key): + del self.data[self.keyfn(key)] + + def __iter__(self): + for _, (k, _) in self.data.items(): + yield k + + def __len__(self): + return len(self.data) + + +class TensorDict(Dict): # pylint: disable=too-many-ancestors + class keyfn: + def __new__(cls, x: Tensor): + if not isinstance(x, Tensor): + return x + return super().__new__(cls) + + def __init__(self, x: Tensor): + self._data = x # do not save id directly to make pickle work + + def __hash__(self): + return id(self._data) + + def __eq__(self, other): + # pylint: disable=undefined-variable + return isinstance(other, __class__) and id(self._data) == id(other._data) + + def __init__(self, *args): + super().__init__(*args) diff --git a/imperative/python/megengine/tensor_nn.py b/imperative/python/megengine/tensor_nn.py new file mode 100644 index 0000000000000000000000000000000000000000..0c4916fb4a8e14b8bc4bf7f4464202603143b4ef --- /dev/null +++ b/imperative/python/megengine/tensor_nn.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from . import Tensor, tensor + + +class Buffer(Tensor): + r"""A kind of Tensor with ``requires_grad=False``. + """ + + +class Parameter(Tensor): + r"""A kind of Tensor that is to be considered a module parameter. + """ + requires_grad = True diff --git a/imperative/python/megengine/test/__init__.py b/imperative/python/megengine/test/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..44ed54c22e810586429e5aed6b0ae41da066e629 --- /dev/null +++ b/imperative/python/megengine/test/__init__.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + + +def assertTensorClose( + v0, v1, *, max_err: float = 1e-6, allow_special_values: bool = False, name=None +): + """ + :param allow_special_values: whether to allow :attr:`v0` and :attr:`v1` to contain inf and nan values. + :param max_err: relative error + """ + __tracebackhide__ = True # pylint: disable=unused-variable + + assert ( + v0.dtype == v1.dtype + ), "Two Tensor must have same dtype, but the inputs are {} and {}".format( + v0.dtype, v1.dtype + ) + v0 = np.ascontiguousarray(v0, dtype=np.float32).copy() + v1 = np.ascontiguousarray(v1, dtype=np.float32).copy() + if allow_special_values: + # check nan and rm it + v0_nan_mask = np.isnan(v0) + if np.any(v0_nan_mask): + assert np.array_equiv(v0_nan_mask, np.isnan(v1)), (v0, v1) + v0[v0_nan_mask] = 0 + v1[v0_nan_mask] = 0 + # check inf and rm it + v0_inf_mask = v0 == float("inf") + if np.any(v0_inf_mask): + assert np.array_equiv(v0_inf_mask, v1 == float("inf")), (v0, v1) + v0[v0_inf_mask] = 0 + v1[v0_inf_mask] = 0 + # check -inf and rm it + v0_inf_mask = v0 == float("-inf") + if np.any(v0_inf_mask): + assert np.array_equiv(v0_inf_mask, v1 == float("-inf")), (v0, v1) + v0[v0_inf_mask] = 0 + v1[v0_inf_mask] = 0 + else: + assert np.isfinite(v0.sum()) and np.isfinite(v1.sum()), (v0, v1) + + assert v0.shape == v1.shape, "Two tensor must have same shape({} v.s. {})".format( + v0.shape, v1.shape + ) + vdiv = np.max([np.abs(v0), np.abs(v1), np.ones_like(v0)], axis=0) + err = np.abs(v0 - v1) / vdiv + check = err > max_err + if check.sum(): + idx = tuple(i[0] for i in np.nonzero(check)) + if name is None: + name = "tensor" + else: + name = "tensor {}".format(name) + raise AssertionError( + "{} not equal: " + "shape={} nonequal_idx={} v0={} v1={} err={}".format( + name, v0.shape, idx, v0[idx], v1[idx], err[idx] + ) + ) diff --git a/imperative/python/megengine/utils/__init__.py b/imperative/python/megengine/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1207b5d98cd3578bc39e9ce600a1254a434880c8 --- /dev/null +++ b/imperative/python/megengine/utils/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/imperative/python/megengine/utils/_timed_func_fork_exec_entry.py b/imperative/python/megengine/utils/_timed_func_fork_exec_entry.py new file mode 100644 index 0000000000000000000000000000000000000000..b962d365eb4379c1cbcb79b234b7f6ed04f151a1 --- /dev/null +++ b/imperative/python/megengine/utils/_timed_func_fork_exec_entry.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import argparse +import os +import sys + +from megengine.core._imperative_rt.utils import _timed_func_exec_cb + +try: + from setproctitle import setproctitle +except ImportError: + setproctitle = None + + +def main(): + parser = argparse.ArgumentParser( + description="entry point for fork-exec callback in TimedFuncInvoker;" + " this file should not be used directly by normal user." + ) + parser.add_argument("user_data") + args = parser.parse_args() + + if setproctitle: + setproctitle("megbrain:timed_func_exec:ppid={}".format(os.getppid())) + _timed_func_exec_cb(args.user_data) + raise SystemError("_timed_func_exec_cb returned") + + +if __name__ == "__main__": + main() diff --git a/imperative/python/megengine/utils/hook.py b/imperative/python/megengine/utils/hook.py new file mode 100644 index 0000000000000000000000000000000000000000..9864a94a1f22b81b7a0e50a19fe4febf54386a17 --- /dev/null +++ b/imperative/python/megengine/utils/hook.py @@ -0,0 +1,23 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import weakref + + +class HookHandler: + hook_num = 0 + + def __init__(self, source_dict, hook): + self.id = HookHandler.hook_num + HookHandler.hook_num += 1 + source_dict[self.id] = hook + self.source_ref = weakref.ref(source_dict) + + def remove(self): + source_dict = self.source_ref() + if source_dict is not None and self.id in source_dict: + del source_dict[self.id] diff --git a/imperative/python/megengine/utils/http_download.py b/imperative/python/megengine/utils/http_download.py new file mode 100644 index 0000000000000000000000000000000000000000..add2a649e815eff774ff5ad43bf01a86b931881c --- /dev/null +++ b/imperative/python/megengine/utils/http_download.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import hashlib +import os +import shutil +from tempfile import NamedTemporaryFile + +import requests +from tqdm import tqdm + +from ..logger import get_logger + +logger = get_logger(__name__) + +CHUNK_SIZE = 1024 +HTTP_CONNECTION_TIMEOUT = 5 + + +class HTTPDownloadError(BaseException): + """The class that represents http request error""" + + +def download_from_url(url: str, dst: str, http_read_timeout=120): + """ + Downloads file from given url to ``dst`` + + :param url: source URL + :param dst: saving path + :param http_read_timeout: how many seconds to wait for data before giving up + """ + dst = os.path.expanduser(dst) + dst_dir = os.path.dirname(dst) + + resp = requests.get( + url, timeout=(HTTP_CONNECTION_TIMEOUT, http_read_timeout), stream=True + ) + if resp.status_code != 200: + raise HTTPDownloadError("An error occured when downloading from {}".format(url)) + + md5 = hashlib.md5() + total_size = int(resp.headers.get("Content-Length", 0)) + bar = tqdm( + total=total_size, unit="iB", unit_scale=True, ncols=80 + ) # pylint: disable=blacklisted-name + try: + with NamedTemporaryFile("w+b", delete=False, suffix=".tmp", dir=dst_dir) as f: + logger.info("Download file to temp file %s", f.name) + for chunk in resp.iter_content(CHUNK_SIZE): + if not chunk: + break + bar.update(len(chunk)) + f.write(chunk) + md5.update(chunk) + bar.close() + shutil.move(f.name, dst) + finally: + # ensure tmp file is removed + if os.path.exists(f.name): + os.remove(f.name) + return md5.hexdigest() diff --git a/imperative/python/megengine/utils/max_recursion_limit.py b/imperative/python/megengine/utils/max_recursion_limit.py new file mode 100644 index 0000000000000000000000000000000000000000..0870b7fa0e48bff3bc53aa98d2206ae81b1d2aaa --- /dev/null +++ b/imperative/python/megengine/utils/max_recursion_limit.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import resource +import sys +import threading + + +class AlternativeRecursionLimit: + r"""A reentrant context manager for setting global recursion limits. + """ + + def __init__(self, new_py_limit): + self.new_py_limit = new_py_limit + self.count = 0 + self.lock = threading.Lock() + + self.orig_py_limit = 0 + self.orig_rlim_stack_soft = 0 + self.orig_rlim_stack_hard = 0 + + def __enter__(self): + with self.lock: + if self.count == 0: + self.orig_py_limit = sys.getrecursionlimit() + ( + self.orig_rlim_stack_soft, + self.orig_rlim_stack_hard, + ) = resource.getrlimit(resource.RLIMIT_STACK) + resource.setrlimit( + resource.RLIMIT_STACK, + (self.orig_rlim_stack_hard, self.orig_rlim_stack_hard), + ) + # increase recursion limit + sys.setrecursionlimit(self.new_py_limit) + self.count += 1 + + def __exit__(self, type, value, traceback): + with self.lock: + self.count -= 1 + if self.count == 0: + sys.setrecursionlimit(self.orig_py_limit) + resource.setrlimit( + resource.RLIMIT_STACK, + (self.orig_rlim_stack_soft, self.orig_rlim_stack_hard), + ) + + +_max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1) + + +def max_recursion_limit(): + r"""Sets recursion limit to the max possible value + """ + return _max_recursion_limit_context_manager diff --git a/imperative/python/megengine/utils/net_stats.py b/imperative/python/megengine/utils/net_stats.py new file mode 100644 index 0000000000000000000000000000000000000000..c8a81d9909ffedc727b99a7be8dd33105879f156 --- /dev/null +++ b/imperative/python/megengine/utils/net_stats.py @@ -0,0 +1,280 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from functools import partial + +import numpy as np +import tabulate + +import megengine as mge +import megengine.core.tensor.dtype as dtype +import megengine.module as m +import megengine.module.qat as qatm +import megengine.module.quantized as qm +from megengine.functional.tensor import zeros + +try: + mge.logger.MegEngineLogFormatter.max_lines = float("inf") +except AttributeError as e: + raise ValueError("set logger max lines failed") + +logger = mge.get_logger(__name__) +logger.setLevel("INFO") + + +CALC_FLOPS = {} + + +def _register_modules(*modules): + def callback(impl): + for module in modules: + CALC_FLOPS[module] = impl + return impl + + return callback + + +@_register_modules( + m.Conv2d, + m.ConvTranspose2d, + m.LocalConv2d, + qm.Conv2d, + qm.ConvRelu2d, + qm.ConvBn2d, + qm.ConvBnRelu2d, + qatm.Conv2d, + qatm.ConvRelu2d, + qatm.ConvBn2d, + qatm.ConvBnRelu2d, +) +def count_convNd(module, input, output): + bias = 1 if module.bias is not None else 0 + group = module.groups + ic = input[0].shape[1] + oc = output[0].shape[1] + goc = oc // group + gic = ic // group + N = output[0].shape[0] + HW = np.prod(output[0].shape[2:]) + # N x Cout x H x W x (Cin x Kw x Kh + bias) + return N * HW * goc * (gic * np.prod(module.kernel_size) + bias) + + +@_register_modules(m.ConvTranspose2d) +def count_deconvNd(module, input, output): + return np.prod(input[0].shape) * output[0].shape[1] * np.prod(module.kernel_size) + + +@_register_modules(m.Linear, qatm.Linear, qm.Linear) +def count_linear(module, input, output): + return np.prod(output[0].shape) * module.in_features + + +# does not need import qat and quantized module since they inherit from float module. +hook_modules = ( + m.Conv2d, + m.ConvTranspose2d, + m.LocalConv2d, + m.BatchNorm2d, + m.Linear, +) + + +def net_stats(model, input_size, bar_length_max=20, log_params=True, log_flops=True): + def dict2table(list_of_dict, header): + table_data = [header] + for d in list_of_dict: + row = [] + for h in header: + v = "" + if h in d: + v = d[h] + row.append(v) + table_data.append(row) + return table_data + + def sizeof_fmt(num, suffix="B"): + for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]: + if abs(num) < 1024.0: + return "{:3.3f} {}{}".format(num, unit, suffix) + num /= 1024.0 + sign_str = "-" if num < 0 else "" + return "{}{:.1f} {}{}".format(sign_str, num, "Yi", suffix) + + def get_byteswidth(tensor): + if dtype.is_quantize(tensor.dtype): + return 1 + # elif dtype.is_bfloat16(tensor.dtype): + # return 2 + else: + return 4 + + def print_flops_stats(flops): + flops_list = [i["flops_num"] for i in flops] + max_flops_num = max(flops_list + [0]) + # calc total flops and set flops_cum + total_flops_num = 0 + for d in flops: + total_flops_num += int(d["flops_num"]) + d["flops_cum"] = sizeof_fmt(total_flops_num, suffix="OPs") + + for i in flops: + f = i["flops_num"] + i["flops"] = sizeof_fmt(f, suffix="OPs") + r = i["ratio"] = f / total_flops_num + i["percentage"] = "{:.2f}%".format(r * 100) + bar_length = int(f / max_flops_num * bar_length_max) + i["bar"] = "#" * bar_length + + header = [ + "name", + "class_name", + "input_shapes", + "output_shapes", + "flops", + "flops_cum", + "percentage", + "bar", + ] + + total_flops_str = sizeof_fmt(total_flops_num, suffix="OPs") + total_var_size = sum(sum(s[1] for s in i["output_shapes"]) for i in flops) + flops.append( + dict(name="total", flops=total_flops_str, output_shapes=total_var_size) + ) + + logger.info( + "flops stats: \n" + tabulate.tabulate(dict2table(flops, header=header)) + ) + + return total_flops_num + + def print_params_stats(params): + total_param_dims, total_param_size = 0, 0 + for d in params: + total_param_dims += int(d["param_dim"]) + total_param_size += int(d["size"]) + d["size"] = sizeof_fmt(d["size"]) + d["size_cum"] = sizeof_fmt(total_param_size) + + for d in params: + ratio = d["param_dim"] / total_param_dims + d["ratio"] = ratio + d["percentage"] = "{:.2f}%".format(ratio * 100) + + # construct bar + max_ratio = max([d["ratio"] for d in params]) + for d in params: + bar_length = int(d["ratio"] / max_ratio * bar_length_max) + d["size_bar"] = "#" * bar_length + + param_size = sizeof_fmt(total_param_size) + params.append(dict(name="total", param_dim=total_param_dims, size=param_size,)) + + header = [ + "name", + "shape", + "mean", + "std", + "param_dim", + "bits", + "size", + "size_cum", + "percentage", + "size_bar", + ] + + logger.info( + "param stats: \n" + tabulate.tabulate(dict2table(params, header=header)) + ) + + return total_param_size + + def net_stats_hook(module, input, output, name=""): + class_name = str(module.__class__).split(".")[-1].split("'")[0] + + flops_fun = CALC_FLOPS.get(type(module)) + if callable(flops_fun): + flops_num = flops_fun(module, input, output) + + if not isinstance(output, (list, tuple)): + output = [output] + + flops.append( + dict( + name=name, + class_name=class_name, + input_shapes=[i.shape for i in input], + output_shapes=[o.shape for o in output], + flops_num=flops_num, + flops_cum=0, + ) + ) + + if hasattr(module, "weight") and module.weight is not None: + w = module.weight + value = w.numpy() + param_dim = np.prod(w.shape) + param_bytes = get_byteswidth(w) + params.append( + dict( + name=name + "-w", + shape=w.shape, + param_dim=param_dim, + bits=param_bytes * 8, + size=param_dim * param_bytes, + size_cum=0, + mean="{:.2g}".format(value.mean()), + std="{:.2g}".format(value.std()), + ) + ) + + if hasattr(module, "bias") and module.bias is not None: + b = module.bias + value = b.numpy() + param_dim = np.prod(b.shape) + param_bytes = get_byteswidth(b) + params.append( + dict( + name=name + "-b", + shape=b.shape, + param_dim=param_dim, + bits=param_bytes * 8, + size=param_dim * param_bytes, + size_cum=0, + mean="{:.2g}".format(value.mean()), + std="{:.2g}".format(value.std()), + ) + ) + + # multiple inputs to the network + if not isinstance(input_size[0], tuple): + input_size = [input_size] + + params = [] + flops = [] + hooks = [] + + for (name, module) in model.named_modules(): + if isinstance(module, hook_modules): + hooks.append( + module.register_forward_hook(partial(net_stats_hook, name=name)) + ) + + inputs = [zeros(in_size, dtype=np.float32) for in_size in input_size] + model.eval() + model(*inputs) + for h in hooks: + h.remove() + + total_flops, total_params = 0, 0 + if log_params: + total_params = print_params_stats(params) + if log_flops: + total_flops = print_flops_stats(flops) + + return total_params, total_flops diff --git a/imperative/python/megengine/utils/profile_analyze.py b/imperative/python/megengine/utils/profile_analyze.py new file mode 100755 index 0000000000000000000000000000000000000000..8041c0d8fc66a696f4d6fe5011f1fb63bbe280bf --- /dev/null +++ b/imperative/python/megengine/utils/profile_analyze.py @@ -0,0 +1,424 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import argparse +import collections +import json +import re +import textwrap + +import numpy as np +from tabulate import tabulate + +from megengine.utils.profile_analyzer import ( + NonExistNum, + ProfileAnalyzer, + TimeFuncHelper, +) + + +def _tabulate_ml(tab, **kwargs): + """Tabulate profile output with multi-line support.""" + new_tab = [] + new_tab_is_row = [] + for row in tab: + col_lines = [str(i).split("\n") for i in row] + max_nr_line = max(map(len, col_lines)) + new_tab_is_row.append(True) + if max_nr_line > 1: + new_tab_is_row.extend([False] * (max_nr_line - 1)) + for i in col_lines: + if len(i) < max_nr_line: + i.extend([""] * (max_nr_line - len(i))) + new_tab.extend(zip(*col_lines)) + else: + new_tab.append(row) + + assert len(new_tab_is_row) == len(new_tab) + ret = [i + "\n" for i in tabulate(new_tab, **kwargs).split("\n")] + for idx, val in enumerate(new_tab_is_row): + if not val: + ret[idx * 2 + 2] = "" + return "".join(ret)[:-1] + + +def _tabulate_confluence(tab, **kwargs): + """Tabulate profile output.""" + kwargs.pop("tablefmt", None) + s = tabulate(tab, tablefmt="orgtbl", **kwargs) + lines = s.split("\n") + lines[1] = lines[1].replace("+", "|") + return "\n".join(lines) + + +def main(passed_args=None): # pylint: disable=too-many-statements + """Analyses profile info from :mod:`~.utils.profile_analyzer` . + + Run this file with ``--help`` to get more usage. + """ + parser = argparse.ArgumentParser( + description="analyze analyzer result", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("dump") + parser.add_argument( + "-t", + "--top", + type=int, + default=3, + help="number of most time-consuming operators to print", + ) + parser.add_argument( + "--type", action="append", help="filter oprs in the top list by type" + ) + parser.add_argument( + "--aggregate-by", + default=None, + choices=["type"], + help="aggragate profiling result by", + ) + parser.add_argument( + "--opr-name", help="filter oprs in the top list by regex of name" + ) + parser.add_argument( + "--input-dtype", type=str, help="filter oprs in the top list by input dtype" + ) + parser.add_argument( + "--top-end-key", + default="end", + choices=["end", "kern"], + help="how time in top is calculated; end corresponds " + "to total device time, and kern corresponds to only " + "wait time", + ) + parser.add_argument( + "--aggregate", + default=None, + help="aggregate operations", + choices=["max", "min", "sum", "mean"], + ) + parser.add_argument( + "--order-by", + default="time", + help="sort result according to given column; the param can be " + " or +, meaning sorting in descending or " + "ascending order respectively", + ) + parser.add_argument( + "--copy-time", action="store_true", help="show copy time related result" + ) + parser.add_argument( + "--min-time", + type=float, + default=float("-inf"), + help="minimal time of a result to be printed", + ) + parser.add_argument( + "--max-time", + type=float, + default=float("inf"), + help="maximal time of a result to be printed", + ) + parser.add_argument( + "--show-host", action="store_true", help="show host profiling info" + ) + parser.add_argument( + "--dump-only-opr", + action="store_true", + help="only dump operator info as plaintext; useful " + "for diff between two filtered profile results", + ) + parser.add_argument( + "--confluence", + "--wiki", + action="store_true", + help="output confluence-markdown-compatible table", + ) + parser.add_argument( + "--print-only", + choices={"summary", "device", "host"}, + help="print only chosen info", + ) + + args = parser.parse_args(passed_args) + + opr_filters = [] + if args.type: + opr_filters.append(lambda o, a, b: o["type"] in args.type) + if args.opr_name: + opr_filters.append( + lambda o, a, b, r=re.compile(args.opr_name): r.match(o["name"]) + ) + if args.input_dtype: + opr_filters.append( + lambda o, a, b: any( + [i["mem_plan"]["layout"]["dtype"] == args.input_dtype for i in a] + ) + ) + if not opr_filters: + + def opr_filter(o, a, b): # pylint: disable=unused-argument + return True + + else: + + def opr_filter(o, a, b): + return all(i(o, a, b) for i in opr_filters) + + with open(args.dump) as fin: + dump = json.load(fin) + + analyzer = ProfileAnalyzer(dump, opr_filter) + analyzer_tot = ProfileAnalyzer(dump, lambda _, __, ___: True) + + def summary(): + device_end_func = TimeFuncHelper.eval_time_func("device", "end", np.max) + device_kern_func = TimeFuncHelper.eval_time_func("device", "kern", np.max) + host_end_func = TimeFuncHelper.eval_time_func("host", "end", np.max) + + def get_tot_time(func): + rec = analyzer_tot.select(func, aggregate=np.sum) + if not rec: + return "N/A" + rec = rec[0] + return rec.time + + tab = [] + tot_dev_time = get_tot_time(device_end_func) + tot_host_time = get_tot_time(host_end_func) + tab.append(("total device time", tot_dev_time)) + tab.append(("total host time", tot_host_time)) + if args.copy_time: + + def fmt(a, b): + a = a[0] + b = b[0] + return "tot={:.4f} avg={:.4f}".format(a.time, b.time) + + tab.append( + ( + "copy time", + fmt( + analyzer.select( + device_end_func, + lambda opr: opr.opr_info["type"] == "Copy", + aggregate=np.sum, + ), + analyzer.select( + device_end_func, + lambda opr: opr.opr_info["type"] == "Copy", + aggregate=np.mean, + ), + ), + ) + ) + tab.append( + ( + "copy wait time", + fmt( + analyzer.select( + device_kern_func, + lambda opr: opr.opr_info["type"] == "Copy", + aggregate=np.sum, + ), + analyzer.select( + device_kern_func, + lambda opr: opr.opr_info["type"] == "Copy", + aggregate=np.mean, + ), + ), + ) + ) + + if args.confluence: + tab_str = _tabulate_confluence(tab, headers=["name", "value"]) + else: + tab_str = tabulate(tab) + + return tab_str, tot_dev_time, tot_host_time + + def prof_details(prof_type, tot_time): + tab = [] + + def func( + opr, + *, + f0=TimeFuncHelper.eval_time_func(prof_type, args.top_end_key, np.max) + ): + t = f0(opr) + if t is not None and (t < args.min_time or t > args.max_time): + return None + return t + + records = analyzer.select( + func, + aggregate=args.aggregate, + aggregate_by=args.aggregate_by, + top_k=args.top, + sort_by=args.order_by, + ) + + if args.dump_only_opr: + ret = [] + for i in records: + ret.append(" ".join(i.info.values())) + return "\n".join(ret) + + def format_shapes(shapes, layouts=None, sep="\n"): + if isinstance(shapes, NonExistNum) or shapes is None: + return repr(shapes) + if layouts is None: + layouts = [None] * len(shapes) + + comp = [] + for i, j in zip(shapes, layouts): + i = "{" + ",".join(map(str, i)) + "}" + if j: + i += "\n -[" + ",".join(map(str, j)) + "]" + comp.append(i) + return sep.join(comp) + + def fix_num_and_find_unit(x, base): + if isinstance(x, NonExistNum) or ( + isinstance(x, float) and not np.isfinite(x) + ): + return x, "" + unit = iter(["", "K", "M", "G", "T", "P"]) + while x >= base: + x /= base + next(unit) + return x, next(unit) + + def get_number_with_unit(num, unit, base, sep="\n"): + num, unit_prefix = fix_num_and_find_unit(num, base) + if isinstance(unit, list): + unit = unit[int(unit_prefix != "")] + return ("{:.2f}" + sep + "{}{}").format(num, unit_prefix, unit) + + if args.confluence: + rows = [] + cum_time = 0 + + max_time = max([r.time for r in records]) + max_bandwidth = max([r.bandwidth for r in records]) + max_flops = max( + [r.flops for r in records if not isinstance(r.flops, NonExistNum)] + ) + + bar_length = 15 + for idx, record in enumerate(records): + cum_time += record.time + + opr_info = [("opr " + k, v) for k, v in record.info.items()] + + row = collections.OrderedDict( + [ + ("#", idx), + ("time", "{:.3}".format(record.time)), + ("ratio", "{:.1f}%".format(record.time / tot_time * 100)), + ("time bar", "#" * int(record.time / max_time * bar_length)), + ("cum-time", cum_time), + ("cum-time ratio", cum_time / tot_time), + ] + + opr_info + + [ + ( + "computation (MFLO)", + "{:.1f}".format(record.computation / 1000 ** 2), + ), + ("MFLOPS", "{:.1f}".format(record.flops / 1000 ** 2)), + ( + "MFLOPS-bar", + "" + if isinstance(record.flops, NonExistNum) + else ("#" * int(record.flops / max_flops * bar_length)), + ), + ("memory (MB)", "{:.1f}".format(record.memory / 1024 ** 2)), + ( + "bandwidth (MiB/s)", + "{:.1f}".format(record.bandwidth / 1024 ** 2), + ), + ( + "bandwidth bar", + "#" * int(record.bandwidth / max_bandwidth * bar_length), + ), + ( + "in_shapes", + format_shapes( + record.in_shapes, record.in_layouts, sep=", " + ), + ), + ("out_shapes", format_shapes(record.out_shapes, sep=", ")), + ] + ) + rows.append(row) + headers = list(rows[0].keys()) + tab = [[row[i] for i in headers] for row in rows] + + return _tabulate_confluence(tab, headers=headers) + + else: + cum_time = 0 + for idx, record in enumerate(records): + cum_time += record.time + tab.append( + ( + "#{}\n{:.3}\n{:.1f}%".format( + idx, record.time, record.time / tot_time * 100 + ), + "{:.3}\n{:.1f}%".format(cum_time, cum_time / tot_time * 100), + "\n".join( + "\n- ".join(textwrap.wrap(str(i), width=30)) + for i in record.info.values() + ), + get_number_with_unit(record.computation, "FLO", 1000), + get_number_with_unit(record.flops, "FLOPS", 1000), + get_number_with_unit(record.memory, ["byte", "iB"], 1024), + get_number_with_unit( + record.bandwidth, ["byte/s", "iB/s"], 1024 + ), + format_shapes(record.in_shapes, record.in_layouts), + format_shapes(record.out_shapes), + ) + ) + return _tabulate_ml( + tab, + headers=[ + "{} self time".format(prof_type), + "cumulative", + "operator info", + "computation", + "FLOPS", + "memory", + "bandwidth", + "in_shapes", + "out_shapes", + ], + tablefmt="fancy_grid", + ) + + summary_tab, tot_dev_time, tot_host_time = summary() + if args.print_only: + print( + { + "summary": lambda: summary_tab, + "device": lambda: prof_details("device", tot_dev_time), + "host": lambda: prof_details("host", tot_host_time), + }[args.print_only]() + ) + else: + print(summary_tab) + print() + print(prof_details("device", tot_dev_time)) + if args.show_host: + print() + print(prof_details("host", tot_host_time)) + + +if __name__ == "__main__": + main() diff --git a/imperative/python/megengine/utils/profile_analyzer.py b/imperative/python/megengine/utils/profile_analyzer.py new file mode 100644 index 0000000000000000000000000000000000000000..75cc0c0c7c6511dd0e5b232b0acd102e4ffe456d --- /dev/null +++ b/imperative/python/megengine/utils/profile_analyzer.py @@ -0,0 +1,401 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections +import copy +import functools +from typing import Callable, List, Optional, Union + +import numpy as np + + +class NonExistNum: + """An object that behaves like a number but means a field does not exist; It is + always greater than any real number + """ + + def __truediv__(self, _): + return self + + def __add__(self, rhs): + return rhs + + def __radd__(self, lhs): + return lhs + + def __neg__(self): + return self + + def __gt__(self, rhs): + if isinstance(rhs) is NonExistNum: + return id(self) > id(rhs) + return True + + def __ge__(self, rhs): + return self > rhs or self == rhs + + def __lt__(self, rhs): + if isinstance(rhs) is NonExistNum: + return id(self) < id(rhs) + return False + + def __le__(self, rhs): + return self < rhs or self == rhs + + def __eq__(self, rhs): + return self is rhs + + def __format__(self, spec): + return "N/A" + + def __repr__(self): + return "N/A" + + +class OprProfRst: + """Opr profiling result dumped from megengine profiler.""" + + opr_info = None + """A dict containing operator info: name, id and type.""" + + time_dict = None + """A mapping from ``"host"`` or ``"device"`` to list of profiling + results.""" + + footprint = None + """A mapping from ``"memory"`` or ``"computation"`` to the actual number + of corresponding operations""" + + def __init__(self, entry: dict): + """Opr profiling initialization, which sets up name, type and id of opr_info. + + :param entry: profiling json exec_graph items + """ + assert isinstance(entry, dict) + self.opr_info = collections.OrderedDict() + for key in ["name", "type", "id"]: + self.opr_info[key] = entry[key] + self.time_dict = collections.defaultdict(list) + self.footprint = collections.defaultdict(NonExistNum) + + def update_device_prof_info(self, dev_time: dict): + """Updates device profiling info + + :param dev_time: device time for single opr, + is an attribute of profiling result. + """ + assert isinstance(dev_time, dict) + self.time_dict["device"].append(copy.deepcopy(dev_time)) + + def update_host_prof_info(self, host_time: dict): + """Updates host profiling info + + :param host_time: host time for single opr, + is an attribute of profiling result. + """ + assert isinstance(host_time, dict) + self.time_dict["host"].append(copy.deepcopy(host_time)) + + def update_footprint(self, footprint: dict): + """Updates opr footprint + + :param footprint: footprint for single opr, + is an attribute of profiling result. + """ + assert isinstance(footprint, dict) + self.footprint.update(footprint) + + +class Record: + """A record of analyzing result""" + + __slot__ = [ + "time", + "info", + "computation", + "memory", + "in_shapes", + "in_layouts", + "out_shapes", + "flops", + "bandwidth", + "opr_id", + ] + + def __init__(self, time: float, info: dict, footprint: dict): + """Initializes single record + + :param time: opr running time, evaluated by applying users providing + function to OprProfRst. + :param info: opr information, could be original opr information or + aggregate infomation if aggregating enabled. + :param footprint: contains footprint information, for now, we have + ``"computation"``, ``"memory"``, ``"in_shapes"``, ``"out_shapes"``. + """ + + assert isinstance(footprint, dict) + self.time = time + self.info = collections.OrderedDict(copy.deepcopy(info)) + self.computation = footprint["computation"] or NonExistNum() + self.memory = footprint["memory"] + self.in_shapes = footprint["in_shapes"] + self.in_layouts = footprint.get("in_layouts") + self.out_shapes = footprint["out_shapes"] + self.flops = self.computation / self.time + self.bandwidth = self.memory / self.time + self.opr_id = info.get("id") + if isinstance(self.opr_id, str) and self.opr_id != "N/A": + self.opr_id = int(self.opr_id) + + def get_column_by_name(self, name: str = None): + """extracts column value by its column name + + :param name: column name, None for time. + """ + + if name is None: + name = "time" + return getattr(self, name) + + +class ProfileAnalyzer: + def __init__(self, obj: dict, opr_filter: Callable = lambda opr, inp, out: True): + """Initializes ProfileAnalyzer + + :param obj: dict dumped from json str. + :param opr_filter: function that filter oprs. + """ + self._opr_set = dict() # type: dict + assert isinstance(obj, dict) + varz = obj["graph_exec"]["var"] + for opr_id, entry in obj["graph_exec"]["operator"].items(): + inp = [varz[i] for i in entry["input"]] + out = [varz[i] for i in entry["output"]] + if opr_filter(entry, inp, out): + self._opr_set[opr_id] = OprProfRst(entry) + + for opr_id, entry in obj["profiler"]["device"].items(): + if opr_id not in self._opr_set: + continue + opr = self._opr_set[opr_id] + for _, time in entry.items(): + opr.update_device_prof_info(time) + + for opr_id, entry in obj["profiler"]["host"].items(): + if opr_id not in self._opr_set: + continue + opr = self._opr_set[opr_id] + for _, time in entry.items(): + opr.update_host_prof_info(time) + + for opr_id, entry in obj["profiler"].get("opr_footprint", {}).items(): + if opr_id not in self._opr_set: + continue + opr = self._opr_set[opr_id] + opr.update_footprint(entry) + + def _aggregate( + self, records: List[Record], aop: Union[str, Callable], atype: Optional[str] + ) -> List[Record]: + """Aggregate operation + + :param records: selected records + :param aop: aggregate operation, if aop is str, we would replace it + with associated numpy function wth aop name" + :param atype: the type aggregated by, None for aggregating all into single + record. + """ + if aop is None: + assert atype is None, "must specify aggregate op" + return records + if isinstance(aop, str): + aop = getattr(np, aop) + type2stat = collections.defaultdict(lambda: [[], [], []]) # type: dict + for item in records: + if atype == "type": + d = type2stat[item.info["type"]] + else: + d = type2stat["all"] + d[0].append(item.time) + d[1].append(item.computation) + d[2].append(item.memory) + + rst = [] + for opr_type in type2stat.keys(): + time, computation, memory = type2stat[opr_type] + nr_oprs = len(time) + time_rst = aop(time) + comp_rst = aop(computation) + mem_rst = aop(memory) + + item = Record( + time_rst, + {"type": opr_type, "count": nr_oprs, "id": "N/A"}, + { + "computation": comp_rst, + "memory": mem_rst, + "in_shapes": None, + "out_shapes": None, + }, + ) + rst.append(item) + return rst + + def _sort(self, records: List[Record], sort_by: str) -> List[Record]: + """sort operation + + :param records: the records after aggregate operation. + :param sort_by: keyword for sorting the list + """ + if sort_by is None: + return records + if sort_by.startswith("+"): + sort_by = sort_by[1:] + key = lambda record: record.get_column_by_name(sort_by) + else: + key = lambda record: -record.get_column_by_name(sort_by) + records.sort(key=key) + return records + + def select( + self, + time_func: Callable, + opr_filter: Callable = lambda opr: True, + aggregate: Callable = None, + aggregate_by: str = None, + sort_by: str = None, + top_k: int = 0, + ) -> List[Record]: + """Select operation + + :param time_func: time_func provided by user, would apply to every + OprProfRst + :param opr_filter: filter satisfied operatiors. + :param aggregate: function that apply to list of records which are + aggregated by atype + :param aggregate_by: the type aggregated by + :param sort_by: keyword for sorting all records. + :param top_k: specify the maximum number of records. + :return: the records that go through select, aggregate, sort. + """ + + records = [] + for opr in self._opr_set.values(): + if opr_filter(opr): + time = time_func(opr) + if time is None: + continue + item = Record(time, opr.opr_info, opr.footprint) + records.append(item) + + records = self._aggregate(records, aggregate, aggregate_by) + if not records: + return records + return self._sort(records, sort_by)[0 : len(records) if top_k == 0 else top_k] + + +class TimeFuncHelper: + """Time Function Helper for users.""" + + @staticmethod + def _eval_time(prof_type, end_key, func, opr_prof): + """Eval time + + :type prof_type: str + :param prof_type: 'host' or 'device' + :type end_key: str + :param end_key: 'kern' or 'end' + :type func: function + :param func: apply to list of all ``thread`` of ``gpu`` time. + :type opr_prof: `class OprProfRst` + :param opr_prof: operator profiling result + :rtype: float + :return: time + """ + + if prof_type not in opr_prof.time_dict: + return None + time = [time[end_key] - time["start"] for time in opr_prof.time_dict[prof_type]] + return func(time) + + @staticmethod + def eval_time_func(prof_type: str, end_key: str, func: Callable) -> float: + """Eval oprerator profile time. + + :param prof_type: 'host' or 'device' + :param end_key: 'kern' or 'end' + :param func: apply to list of all ``thread`` of ``gpu`` time. + :return: Eval time results + """ + return functools.partial(TimeFuncHelper._eval_time, prof_type, end_key, func) + + @staticmethod + def _min_start( + prof_type, end_key, func, opr_prof + ): # pylint: disable=unused-argument + """Eval minimum start time + + :type prof_type: str + :param prof_type: 'host' or 'device' + :type end_key: str + :param end_key: 'kern' or 'end' + :type func: function + :param func: apply to list of all ``thread`` of ``gpu`` time. + :type opr_prof: `class OprProfRst` + :param opr_prof: operator profiling result + :rtype: float + :return: time + """ + if prof_type not in opr_prof.time_dict: + return None + time = [time["start"] for time in opr_prof.time_dict[prof_type]] + return np.min(time) + + @staticmethod + def min_start_func( + prof_type: str, end_key: str, func: Callable + ) -> float: # pylint: disable=unused-argument + """Eval oprerator profile min start time + + :param prof_type: 'host' or 'device' + :param end_key: 'kern' or 'end' + :param func: apply to list of all ``thread`` of ``gpu`` time. + :return: Eval time results + """ + return functools.partial(TimeFuncHelper._min_start, prof_type, end_key, func) + + @staticmethod + def _max_end(prof_type, end_key, func, opr_prof): # pylint: disable=unused-argument + """Eval maximum end time + + :type prof_type: str + :param prof_type: 'host' or 'device' + :type end_key: str + :param end_key: 'kern' or 'end' + :type func: function + :param func: apply to list of all ``thread`` of ``gpu`` time. + :type opr_prof: `class OprProfRst` + :param opr_prof: operator profiling result + :rtype: float + :return: time + """ + if prof_type not in opr_prof.time_dict: + return None + time = [time["end"] for time in opr_prof.time_dict[prof_type]] + return np.max(time) + + @staticmethod + def max_end_func(prof_type: str, end_key: str, func: Callable) -> float: + """Eval oprerator profile max end time + + :param prof_type: 'host' or 'device' + :param end_key: 'kern' or 'end' + :param func: apply to list of all ``thread`` of ``gpu`` time. + :return: Eval time results + """ + return functools.partial(TimeFuncHelper._max_end, prof_type, end_key, func) diff --git a/imperative/python/megengine/utils/profiler.py b/imperative/python/megengine/utils/profiler.py new file mode 100644 index 0000000000000000000000000000000000000000..12dae2d2ff6406f4f26ea3ed9fc5c807973060a0 --- /dev/null +++ b/imperative/python/megengine/utils/profiler.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Optional + +from ..core._imperative_rt import ProfilerImpl +from ..core._imperative_rt.imperative import sync + + +class Profiler: + def __init__(self, path: Optional[str] = None): + self.impl = ProfilerImpl(path) + + def __enter__(self): + sync() + self.impl.enable() + return self + + def __exit__(self, val, type, trace): + sync() + self.impl.disable() + + def dump(self, path: Optional[str] = None): + self.impl.dump(path) diff --git a/imperative/python/megengine/utils/types.py b/imperative/python/megengine/utils/types.py new file mode 100644 index 0000000000000000000000000000000000000000..465ca03ce68f02d3944ddb87f5b0d4abde5ef9f9 --- /dev/null +++ b/imperative/python/megengine/utils/types.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections +import functools + + +def get_ndtuple(value, *, n, allow_zero=True): + r"""Converts possibly 1D tuple to nd tuple + + :type allow_zero: bool + :param allow_zero: whether to allow zero tuple value""" + if not isinstance(value, collections.Iterable): + value = int(value) + value = tuple([value for i in range(n)]) + else: + assert len(value) == n, "tuple len is not equal to n: {}".format(value) + spatial_axis = map(int, value) + value = tuple(spatial_axis) + if allow_zero: + minv = 0 + else: + minv = 1 + assert min(value) >= minv, "invalid value: {}".format(value) + return value + + +_single = functools.partial(get_ndtuple, n=1, allow_zero=True) +_pair = functools.partial(get_ndtuple, n=2, allow_zero=True) +_pair_nonzero = functools.partial(get_ndtuple, n=2, allow_zero=False) +_triple = functools.partial(get_ndtuple, n=3, allow_zero=True) +_quadruple = functools.partial(get_ndtuple, n=4, allow_zero=True) diff --git a/imperative/python/megengine/version.py b/imperative/python/megengine/version.py new file mode 100644 index 0000000000000000000000000000000000000000..e4ce488fb676a8fee1611889924a2064f7e100f9 --- /dev/null +++ b/imperative/python/megengine/version.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +__version__ = "0.8.0" + diff --git a/imperative/python/requires-style.txt b/imperative/python/requires-style.txt new file mode 100644 index 0000000000000000000000000000000000000000..899aac5275ae83a271837340ac99fddea04f6b1e --- /dev/null +++ b/imperative/python/requires-style.txt @@ -0,0 +1,4 @@ +black==19.10b0 +isort==4.3.21 +pylint==2.4.3 +mypy==0.750 diff --git a/imperative/python/requires-test.txt b/imperative/python/requires-test.txt new file mode 100644 index 0000000000000000000000000000000000000000..545de8af81210195442467c3e0c245ea83dc6e3a --- /dev/null +++ b/imperative/python/requires-test.txt @@ -0,0 +1 @@ +pytest==5.3.0 diff --git a/imperative/python/requires.txt b/imperative/python/requires.txt new file mode 100644 index 0000000000000000000000000000000000000000..a2d8a55df078d1b40a2d8b46a75035f485c0a263 --- /dev/null +++ b/imperative/python/requires.txt @@ -0,0 +1,8 @@ +numpy>=1.18 +multipledispatch==0.6.0 +opencv-python +pyarrow +requests +tabulate +tqdm +redispy diff --git a/imperative/python/setup.py b/imperative/python/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..e583cce4412f89331ef8de62eb1455b46f75a524 --- /dev/null +++ b/imperative/python/setup.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import os +import re +import pathlib +from distutils.file_util import copy_file +from setuptools import setup, find_packages, Extension +from setuptools.command.build_ext import build_ext as _build_ext + +class PrecompiledExtesion(Extension): + def __init__(self, name): + super().__init__(name, sources=[]) + +class build_ext(_build_ext): + + def build_extension(self, ext): + if not isinstance(ext, PrecompiledExtesion): + return super().build_extension(ext) + + if not self.inplace: + fullpath = self.get_ext_fullpath(ext.name) + extdir = pathlib.Path(fullpath) + extdir.parent.mkdir(parents=True, exist_ok=True) + + modpath = self.get_ext_fullname(ext.name).split('.') + modpath[-1] += '.so' + modpath = str(pathlib.Path(*modpath).resolve()) + + copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run) + +package_name = 'MegEngine' + +v = {} +with open("megengine/version.py") as fp: + exec(fp.read(), v) +__version__ = v['__version__'] + +email = 'megengine@megvii.com' +local_version = os.environ.get('LOCAL_VERSION') +if local_version: + __version__ = '{}+{}'.format(__version__, local_version) + +packages = find_packages(exclude=['test']) + +with open('requires.txt') as f: + requires = f.read().splitlines() +with open('requires-style.txt') as f: + requires_style = f.read().splitlines() +with open('requires-test.txt') as f: + requires_test = f.read().splitlines() + +setup_kwargs = dict( + name=package_name, + version=__version__, + description='Framework for numerical evaluation with ' + 'auto-differentiation', + author='Megvii Engine Team', + author_email=email, + packages=packages, + ext_modules=[PrecompiledExtesion('megengine.core._imperative_rt')], + install_requires=requires, + extras_require={ + 'dev': requires_style + requires_test, + 'ci': requires_test, + }, + cmdclass={'build_ext': build_ext}, +) + +setup_kwargs.update(dict( + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'Intended Audience :: Education', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: C++', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Mathematics', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Software Development', + 'Topic :: Software Development :: Libraries', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + license='Apache 2.0', + keywords='megengine deep learning', + data_files = [("megengine", [ + "../LICENSE", + "../ACKNOWLEDGMENTS", + ])] +)) + +setup(**setup_kwargs) diff --git a/imperative/python/src/common.cpp b/imperative/python/src/common.cpp new file mode 100644 index 0000000000000000000000000000000000000000..047a0f80f2e45ed8b3973fcc1d3b4d4bcd0615ab --- /dev/null +++ b/imperative/python/src/common.cpp @@ -0,0 +1,117 @@ +#include "./common.h" + +#include + +#include "megbrain/comp_node.h" +#include "megbrain/graph.h" +#include "megbrain/imperative/physical_tensor.h" +#include "./numpy_dtypes.h" +#include "./helper.h" + +namespace py = pybind11; +using namespace mgb; +using namespace imperative; + +void init_common(py::module m) { + py::class_(m, "CompNode") + .def(py::init()) + .def(py::init(py::overload_cast(&CompNode::load))) + .def("__str__", &CompNode::to_string) + .def_static("_sync_all", &CompNode::sync_all) + .def(py::self == py::self) + .def_static("_get_device_count", &CompNode::get_device_count, + "Get total number of specific devices on this system") + .def(py::pickle( + [](const CompNode& cn) { + return py::str(cn.to_string_logical()); + }, + [](py::str cn) { + return CompNode::load(cn); + })); + + py::implicitly_convertible(); + + py::class_(m, "DeviceTensorND") + .def(py::init()) + .def_property_readonly("shape", py::overload_cast<>(&DeviceTensorND::shape, py::const_)) + .def_property_readonly("dtype", py::overload_cast<>(&DeviceTensorND::dtype, py::const_)) + .def_property_readonly("comp_node", py::overload_cast<>(&DeviceTensorND::comp_node, py::const_)) + .def("numpy", [](const DeviceTensorND& self) { + HostTensorND hv; + hv.copy_from(self).sync(); + return py::handle(npy::ndarray_from_tensor(hv, npy::ShareType::TRY_SHARE)); + }); + + py::class_(m, "OperatorNodeConfig") + .def(py::init()) + .def_property("name", + [](const OperatorNodeConfig& config) -> py::object { + auto name = config.name(); + if (name.valid()) { + return py::str(name.val()); + } else { + return py::none(); + } + }, + [](OperatorNodeConfig& config, std::string name){ + config.name(std::move(name)); + }) + .def_property("dtype", + [](const OperatorNodeConfig& config) { + return config.output_dtype(); + }, + [](OperatorNodeConfig& config, DType dtype) { + config.output_dtype(dtype); + }) + .def_property("comp_node_arr", + [](const OperatorNodeConfig& config) -> py::tuple { + auto arr = config.comp_node(); + std::vector tmp(arr.begin(), arr.end()); + return py::cast(tmp); + }, + [](OperatorNodeConfig& config, std::vector cns) { + config.comp_node_arr({cns.begin(), cns.end()}); + }) + .def_property("comp_node", + [](const OperatorNodeConfig& config) { + auto arr = config.comp_node(); + if (arr.size() != 1) { + throw py::value_error("invalid number of comp_node"); + } + return arr[0]; + }, + [](OperatorNodeConfig& config, CompNode cn) { + OperatorNodeConfig::CompNodeArray arr{cn}; + config.comp_node_arr(arr); + }); + + py::class_(m, "TensorAttr") + .def(py::init()) + .def(py::init([](const TensorShape& shape, const DType& dtype, const CompNode& comp_node){ + return LogicalTensorDesc{TensorLayout{shape, dtype}, comp_node}; + })) + .def_property("shape", + [](const LogicalTensorDesc& desc) { + return static_cast(desc.layout); + }, + [](LogicalTensorDesc& desc, TensorShape shape) { + }) + .def_property("dtype", + [](const LogicalTensorDesc& desc) { + return desc.layout.dtype; + }, + [](LogicalTensorDesc& desc, DType dtype) { + desc.layout.dtype = dtype; + }) + .def_readwrite("comp_node", &LogicalTensorDesc::comp_node); + + py::enum_(m, "DeviceType") + .value("UNSPEC", CompNode::DeviceType::UNSPEC) + .value("CUDA", CompNode::DeviceType::CUDA) + .value("CPU", CompNode::DeviceType::CPU) + .value("MULTITHREAD", CompNode::DeviceType::MULTITHREAD) + .value("MAX_DEVICE_ID", CompNode::DeviceType::MAX_DEVICE_ID); + + init_npy_num_bfloat16(m); + init_npy_num_intbx(m); +} diff --git a/imperative/python/src/common.h b/imperative/python/src/common.h new file mode 100644 index 0000000000000000000000000000000000000000..582019b817ce137e1c30d008bb1dcd37977322be --- /dev/null +++ b/imperative/python/src/common.h @@ -0,0 +1,5 @@ +#pragma once + +#include "./helper.h" + +void init_common(pybind11::module m); diff --git a/imperative/python/src/graph_rt.cpp b/imperative/python/src/graph_rt.cpp new file mode 100644 index 0000000000000000000000000000000000000000..022bbf8c90e7d64442747454a989be9e4b94e706 --- /dev/null +++ b/imperative/python/src/graph_rt.cpp @@ -0,0 +1,191 @@ +#include "./graph_rt.h" + +#include "megbrain/imperative/opr_utility.h" +#include "megbrain/opr/basic_arith.h" +#include "megbrain/imperative.h" +#include "./helper.h" + +namespace py = pybind11; + +using namespace mgb; +using namespace imperative; + +#define DEF_READWRITE(name) .def_readwrite(#name, &CURRENT_CLASS::name) + +template +auto def_rendezvous(py::object m, const char* name) { + return py::class_, std::shared_ptr>>(m, name) + .def(py::init([](){return std::make_shared>();})) + .def("set", [](Rendezvous& r, T v) {r.set(std::move(v));}) + .def("get", [](Rendezvous& r) {return r.get();}, py::call_guard()) + .def("reset", &Rendezvous::reset); +} + +using TensorAttr = LogicalTensorDesc; + +void init_graph_rt(py::module m) { + def_rendezvous(m, "DeviceTensorNDRendezvous"); + + def_rendezvous(m, "TensorAttrRendezvous"); + + py::class_>(m, "VarNode") + .def_property_readonly("owner", [](cg::VarNode* v) {return v->owner_opr();}) + .def_property_readonly("graph", [](cg::VarNode* v) {return v->owner_graph();}) + .def_property_readonly("dtype", [](cg::VarNode* v) {return v->dtype();}) + .def_property_readonly("comp_node", [](cg::VarNode* v) {return v->comp_node();}); + + py::class_>(m, "OperatorNode") + .def_property_readonly("graph", [](cg::OperatorNodeBase* opr) {return opr->owner_graph();}) + .def_property_readonly("inputs", [](cg::OperatorNodeBase* opr) { + return to_tuple(opr->input()); + }) + .def_property_readonly("outputs", [](cg::OperatorNodeBase* opr) { + return to_tuple(opr->output()); + }); + + py::class_(m, "AsyncExecutable") + .def("execute", &cg::AsyncExecutable::execute, py::call_guard()) + .def("wait", &cg::AsyncExecutable::wait, py::call_guard()); + + auto PyComputingGraph = py::class_>(m, "ComputingGraph") + .def(py::init(py::overload_cast<>(&cg::ComputingGraph::make))) + .def("compile", [](cg::ComputingGraph& graph, const std::vector& dest_vars) { + mgb_assert(!dest_vars.empty()); + cg::ComputingGraph::OutputSpec spec; + for (auto v : dest_vars) { + spec.emplace_back(v, nullptr); + } + return graph.compile(spec); + }) + .def_property_readonly("options", py::overload_cast<>(&cg::ComputingGraph::options)); + +#define CURRENT_CLASS cg::ComputingGraph::Options + + auto PyComputingGraphOptions = py::class_(PyComputingGraph, "Options") + // DEF_READWRITE(opr_attribute) + DEF_READWRITE(seq_opt) + DEF_READWRITE(graph_opt) + DEF_READWRITE(graph_opt_level) + DEF_READWRITE(log_level) + DEF_READWRITE(async_exec_level) + DEF_READWRITE(force_dynamic_alloc) + DEF_READWRITE(var_sanity_check_first_run) + DEF_READWRITE(allocate_static_mem_after_graph_compile) + DEF_READWRITE(fake_next_exec) + DEF_READWRITE(enable_sublinear_memory_opt) + DEF_READWRITE(no_profiling_on_shape_change) + DEF_READWRITE(enable_var_mem_defragment) + DEF_READWRITE(enable_grad_var_static_reshape) + DEF_READWRITE(enable_memory_swap) + DEF_READWRITE(comp_node_seq_record_level) + // DEF_READWRITE(eager_evaluation) + // DEF_READWRITE(imperative_proxy_graph) + // DEF_READWRITE(extra_vardeps) + // DEF_READWRITE(user_data) + ; + +#undef CURRENT_CLASS +#define CURRENT_CLASS cg::ComputingGraph::Options::SeqOpt + + py::class_(PyComputingGraphOptions, "SeqOpt") + DEF_READWRITE(enable_mem_plan_opt) + DEF_READWRITE(enable_mem_reuse_alloc) + DEF_READWRITE(enable_seq_comp_node_opt); + +#undef CURRENT_CLASS +#define CURRENT_CLASS cg::ComputingGraph::Options::GraphOpt + + py::class_(PyComputingGraphOptions, "GraphOpt") + DEF_READWRITE(jit) + DEF_READWRITE(tensorrt); + +#undef CURRENT_CLASS + + auto common = rel_import("common", m, 1); + + common.def("invoke_op", [](const OpDef& def, const std::vector inputs, cg::ComputingGraph* graph) { + cg::VarNodeArray vinputs(inputs.begin(), inputs.end()); + auto opr = OpDef::apply_on_var_node(def, vinputs); + auto outputs = opr->output(); + return to_tuple(outputs); + }, + py::arg(), py::arg(), py::arg("graph") = py::none()); + + auto input_callback = [](auto callback, + const CompNode& comp_node, + const DType& dtype, + const std::vector& inputs, + cg::ComputingGraph* graph) { + if (!graph) { + graph = inputs[0]->owner_graph(); + } + SymbolVarArray sinputs; + for (auto i : inputs) { + sinputs.emplace_back(i); + } + static_assert(!std::is_reference::value); + auto soutputs = opr::InputCallback::make(*graph, std::move(callback), comp_node, dtype, sinputs); + std::vector outputs; + outputs.reserve(soutputs.size()); + for (auto i : soutputs) { + outputs.push_back(i.node()); + } + return outputs; + }; + + m.def("input_callback", [input_callback](std::function callback, + const CompNode& comp_node, + const DType& dtype, + const std::vector& inputs, + cg::ComputingGraph* graph) { + return input_callback([f=std::move(callback)](){py::gil_scoped_acquire _; return f();}, comp_node, dtype, inputs, graph); + }, + py::arg(), py::arg(), py::arg(), py::arg() = py::tuple(), py::arg("graph") = py::none()); + + m.def("input_callback", [input_callback](std::shared_ptr> p, + const CompNode& comp_node, + const DType& dtype, + const std::vector& inputs, + cg::ComputingGraph* graph) { + auto f = [p]() -> DeviceTensorND { + return p->get(); + }; + return input_callback(std::move(f), comp_node, dtype, inputs, graph); + }, + py::arg(), py::arg(), py::arg(), py::arg() = py::tuple(), py::arg("graph") = py::none()); + + auto output_callback = [](auto callback, const std::vector& inputs, bool borrow = false) { + SymbolVarArray sinputs; + for (auto i : inputs) { + sinputs.emplace_back(i); + } + static_assert(!std::is_reference::value); + opr::OutputCallback::Param param{std::move(callback), borrow}; + auto output = opr::OutputCallback::make(std::move(param), sinputs); + return output.node(); + }; + + m.def("output_callback", [output_callback](std::function callback, std::vector inputs) { + auto f = [f=std::move(callback)](DeviceTensorND dv) { + auto task = [f=std::move(f), dv=std::move(dv)]() { + f(dv); + }; + py_task_q.add_task(std::move(task)); + }; + return output_callback(std::move(f), std::move(inputs)); + }); + + m.def("output_callback", [output_callback](std::shared_ptr> p, std::vector inputs) { + auto f = [p](DeviceTensorND dv) { + p->set(std::move(dv)); + }; + return output_callback(std::move(f), std::move(inputs)); + }); + + m.def("attr_output_callback", [output_callback](std::shared_ptr> p, std::vector inputs) { + auto f = [p](DeviceTensorND dv) { + p->set(TensorAttr{TensorLayout{dv.shape(), dv.dtype()}, dv.comp_node()}); + }; + return output_callback(std::move(f), std::move(inputs), true); + }); +} diff --git a/imperative/python/src/graph_rt.h b/imperative/python/src/graph_rt.h new file mode 100644 index 0000000000000000000000000000000000000000..fbc127c45dc071d2be0f75c5cfeb855b3323d9a6 --- /dev/null +++ b/imperative/python/src/graph_rt.h @@ -0,0 +1,78 @@ +#pragma once + +#include "./helper.h" + +#include +#include +#include + +#include "megbrain/graph.h" + +template +class GraphNodePtr { + std::shared_ptr m_graph; + T* m_node; +public: + GraphNodePtr(T* node) : + m_graph(node ? nullptr : node->owner_graph()->shared_from_this()), + m_node(node) {} + T* operator->() {return m_node;} + T& operator*() {return *m_node;} + operator bool() {return m_node;} + T* get() {return m_node;} +}; + +PYBIND11_DECLARE_HOLDER_TYPE(T, GraphNodePtr, true); + +template +class Rendezvous { + std::mutex m_lock; + int m_read_ahead = 0; + std::promise m_promise; +public: + Rendezvous() = default; + Rendezvous(const Rendezvous& rhs) = delete; + Rendezvous(Rendezvous&& rhs) = default; + Rendezvous& operator=(const Rendezvous& rhs) = delete; + Rendezvous& operator=(Rendezvous&& rhs) { + MGB_LOCK_GUARD(m_lock); + m_read_ahead = rhs.m_read_ahead; + m_promise = std::move(rhs.m_promise); + return *this; + } + + R get() { + std::future f; + { + MGB_LOCK_GUARD(m_lock); + mgb_assert(m_read_ahead <= 0); + mgb_assert(m_read_ahead >= -1); + f = m_promise.get_future(); + if (m_read_ahead == -1) { + m_promise = {}; + } + ++m_read_ahead; + } + return f.get(); + } + + template + void set(T&& value) { + MGB_LOCK_GUARD(m_lock); + mgb_assert(m_read_ahead >= 0); + mgb_assert(m_read_ahead <= 1); + m_promise.set_value(std::forward(value)); + if (m_read_ahead == 1) { + m_promise = {}; + } + --m_read_ahead; + } + + void reset() { + MGB_LOCK_GUARD(m_lock); + m_promise = {}; + m_read_ahead = 0; + } +}; + +void init_graph_rt(pybind11::module m); diff --git a/imperative/python/src/helper.cpp b/imperative/python/src/helper.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a1b8b27759e7b3873d60223023fa84b1049dd687 --- /dev/null +++ b/imperative/python/src/helper.cpp @@ -0,0 +1,705 @@ +#include "./helper.h" + +#include + +#include "megbrain/graph/exc_extra_info.h" +#include "megbrain/graph/event.h" +#include "megbrain/graph/cg.h" +#include "megbrain/tensor.h" +#include "megbrain/utils/mempool.h" +#include "./numpy_dtypes.h" + +/* + * demangle typeid, see + * http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname + */ +#ifdef __GNUG__ +#include +#include +#include + +namespace py = pybind11; + +PyTaskDipatcher py_task_q = {}; + +py::module submodule(py::module parent, const char* name, const char* doc) { + auto m = parent.def_submodule(name, doc); + m.attr("__package__") = parent.attr("__name__"); + m.attr("__builtins__") = py::module::import("builtins"); + return m; +} + +py::module rel_import(py::str name, py::module m, int level) { + py::object import = py::module::import("builtins").attr("__import__"); + return import(name, m.attr("__dict__"), py::arg("level")=level); +} + +namespace { + +std::string demangle_typeid(const char* name) { + + int status = -4; // some arbitrary value to eliminate the compiler warning + + // enable c++11 by passing the flag -std=c++11 to g++ + std::unique_ptr res { + abi::__cxa_demangle(name, nullptr, nullptr, &status), + std::free + }; + + return (status==0) ? res.get() : name ; +} +} +#else + +namespace { +// does nothing if not g++ +std::string demangle_typeid(const char* name) { + return name; +} +} + +#endif + +using namespace mgb; +using namespace cg; + +namespace { + + std::string repr_pyobj(PyObject *obj) { + if (!obj) + return ""; + PYTHON_GIL; + auto str = PyObject_Repr(obj); + if (!str) + return ssprintf("", obj); + std::string ret{PyUnicode_AsUTF8(str)}; + Py_DECREF(str); + return ret; + } + + template + std::string typeid_name(const T &t) { + return demangle_typeid(typeid(t).name()); + } + +} // anonymous namespace + +/* ============== PyExceptionForward ============== */ + +PyExceptionForward::~PyExceptionForward() { + PYTHON_GIL; + PyObjRefKeeper::deleter(m_type); + PyObjRefKeeper::deleter(m_value); + PyObjRefKeeper::deleter(m_traceback); +} + +void PyExceptionForward::restore() { + PyErr_Restore(m_type, m_value, m_traceback); + m_type = m_value = m_traceback = nullptr; +} + +void PyExceptionForward::throw_() { + PyObject *etype, *obj, *trace; + PyErr_Fetch(&etype, &obj, &trace); + PyErr_NormalizeException(&etype, &obj, &trace); + + std::string msg{"python exception"}; + bool succ = false; + if (etype && obj && trace) { + auto run = [&]() { +#define DEF(name, expr) \ + PyObjRefKeeper name{expr}; \ + if (!name.get()) \ + return + DEF(mod, PyImport_ImportModule("traceback")); + DEF(result, PyObject_CallMethod(mod.get(), "format_exception", + "(OOO)", etype, obj, trace)); + if (!PyList_Check(result.get())) + return; + auto size = PyList_Size(result.get()); + msg.append(":\n"); + for (Py_ssize_t i = 0; i < size; ++i) { + msg.append(" "); + msg.append(PyUnicode_AsUTF8(PyList_GetItem(result.get(), i))); + } + msg.pop_back(); // remove last \n + succ = true; +#undef DEF + }; + run(); + } + if (!succ) { + PyObject* obj_str_py; + if (obj && (obj_str_py = PyObject_Repr(obj))) { + msg.append(" with message "); + msg.append(PyUnicode_AsUTF8(obj_str_py)); + Py_DECREF(obj_str_py); + } else { + msg.append(" with unknown message"); + } + } + // throwing exception may cause abort due to unknown reasons; so we first + // log the message + mgb_log_error("caught exception from python callback: %s", msg.c_str()); + fflush(stdout); + fflush(stderr); + throw PyExceptionForward{etype, obj, trace, msg}; +} + +/* ============== namespace npy ============== */ + +namespace { + +int to_mgb_supported_dtype_raw(int dtype) { + if (dtype == NPY_INT64) + return NPY_INT32; + if (dtype == NPY_FLOAT64) + return NPY_FLOAT32; + return dtype; +} + +#define FOREACH_NPY_DTYPE_PAIR(cb) \ + cb(Uint8, NPY_UINT8) \ + cb(Int8, NPY_INT8) \ + cb(Int16, NPY_INT16) \ + cb(Int32, NPY_INT32) \ + cb(Float16, NPY_FLOAT16) \ + cb(Float32, NPY_FLOAT32) \ + cb(Bool, NPY_BOOL) + +#define FOREACH_NPY_MGB_DTYPE_PAIR(cb) \ + FOREACH_NPY_DTYPE_PAIR(cb) \ + FOREACH_MGB_DTYPE_PAIR(cb) + + + +//! convert megbrain dtype to numpy dtype +int dtype_mgb2np_raw(DType dtype) { + mgb_assert(dtype.valid(), "attempt to convert from invalid dtype"); + switch (dtype.enumv()) { +#define cb(_m, _n) \ + case DTypeEnum::_m: \ + return _n; + FOREACH_NPY_MGB_DTYPE_PAIR(cb) +#undef cb + default: + break; + } + throw ConversionError(ssprintf( + "can not convert dtype %s to numpy dtype", dtype.name())); +} + +struct PyArrayDescrDeleter { + void operator()(PyArray_Descr* obj) { + Py_XDECREF(obj); + } +}; + +//! Convert MegBrain DType to NumPy DType descriptor, the caller receives a new +//! reference to the descriptor. +std::unique_ptr dtype_mgb2np_descr( + DType dtype) { + PYTHON_GIL; + mgb_assert(dtype.valid(), "attempt to convert from invalid dtype"); + auto build_mgb_dtype_dict = + [](const char* name, + const std::vector>& data) { + PyObject* metadata = PyDict_New(); + PyObject* mgb_dtype_metadata = PyDict_New(); + PyDict_SetItemString(mgb_dtype_metadata, "name", + PyUnicode_FromString(name)); + for (const auto& d : data) { + PyDict_SetItemString(mgb_dtype_metadata, d.first, d.second); + } + PyDict_SetItemString(metadata, "mgb_dtype", mgb_dtype_metadata); + return metadata; + }; + if (dtype.has_param()) { + PyArray_Descr* type_descr; + switch (dtype.enumv()) { + case DTypeEnum::Quantized4Asymm: { + auto& param = dtype.param(); + type_descr = PyArray_DescrNewFromType(NPY_UINT8); + type_descr->metadata = build_mgb_dtype_dict( + DTypeTrait::name, + {{"scale", PyFloat_FromDouble(param.scale)}, + {"zero_point", PyLong_FromLong(param.zero_point)}}); + break; + } + case DTypeEnum::QuantizedS4: { + auto& param = dtype.param(); + type_descr = PyArray_DescrNewFromType(NPY_INT8); + type_descr->metadata = build_mgb_dtype_dict( + DTypeTrait::name, + {{"scale", PyFloat_FromDouble(param.scale)}}); + break; + } + case DTypeEnum::Quantized8Asymm: { + auto& param = dtype.param(); + type_descr = PyArray_DescrNewFromType(NPY_UINT8); + type_descr->metadata = build_mgb_dtype_dict( + DTypeTrait::name, + {{"scale", PyFloat_FromDouble(param.scale)}, + {"zero_point", PyLong_FromLong(param.zero_point)}}); + break; + } + case DTypeEnum::QuantizedS8: { + auto& param = dtype.param(); + type_descr = PyArray_DescrNewFromType(NPY_INT8); + type_descr->metadata = build_mgb_dtype_dict( + DTypeTrait::name, + {{"scale", PyFloat_FromDouble(param.scale)}}); + break; + } + case DTypeEnum::QuantizedS32: { + auto& param = dtype.param(); + type_descr = PyArray_DescrNewFromType(NPY_INT32); + type_descr->metadata = build_mgb_dtype_dict( + DTypeTrait::name, + {{"scale", PyFloat_FromDouble(param.scale)}}); + break; + } + default: + mgb_throw(ConversionError, "unhandled parameterized DType %s", + dtype.name()); + } + return std::unique_ptr(type_descr); + } + PyArray_Descr* basic_descr = PyArray_DescrFromType(dtype_mgb2np_raw(dtype)); + mgb_assert(basic_descr != nullptr, + "failed to convert expected dtype to numpy type descriptor"); + return std::unique_ptr(basic_descr); +} + +DType dtype_np2mgb_raw(int npt) { + switch (npt) { +#define cb(_m, _n) \ + case _n: \ + return dtype::_m(); + FOREACH_NPY_DTYPE_PAIR(cb) +#undef cb + } +#define cb(_m, _n) \ + if (_n == npt) return dtype::_m(); + FOREACH_MGB_DTYPE_PAIR(cb) +#undef cb + + PYTHON_GIL; + std::string msg; + auto py_obj = PyArray_TypeObjectFromType(npt); + if (!py_obj) { + msg = ssprintf("unknown numpy dtype enum %d", npt); + } else { + msg = ssprintf("unsupported numpy dtype %s", + repr_pyobj(py_obj).c_str()); + } + Py_DECREF(py_obj); + throw ConversionError(msg); +} + +DType dtype_np2mgb_descr(PyArray_Descr* descr) { + PYTHON_GIL; + auto handle_parameterized_dtype = [](PyObject* metadata) -> DType { + mgb_assert(PyDict_Check(metadata), + "Invalid parameterized DType metadata: should be a dict"); + PyObject* dtype_name_py = PyDict_GetItemString(metadata, "name"); + mgb_assert( + PyUnicode_Check(dtype_name_py), + "Invalid parameterized DType metadata: name should be a str"); + std::string dtype_name(PyUnicode_AsUTF8(dtype_name_py)); + if (dtype_name == "Quantized8Asymm") { + PyObject* scale_py = PyDict_GetItemString(metadata, "scale"); + PyObject* zero_point_py = + PyDict_GetItemString(metadata, "zero_point"); + mgb_assert(scale_py && zero_point_py, + "Invalid Quantized8Asymm metadata: missing scale or " + "zero_point."); + mgb_assert( + PyFloat_Check(scale_py), + "Invalid Quantized8Asymm metadata: scale should be float"); + mgb_assert(PyLong_Check(zero_point_py), + "Invalid Quantized8Asymm metadata: zero_point should be " + "integer"); + auto zero_point = PyLong_AS_LONG(zero_point_py); + mgb_assert(zero_point >= 0 && zero_point < 256, + "Invalid Quantized8Asymm metadata: zero_point should be " + "in [0, 256)"); + return dtype::Quantized8Asymm( + static_cast(PyFloat_AS_DOUBLE(scale_py)), + static_cast(zero_point)); + } + if (dtype_name == "Quantized4Asymm") { + PyObject* scale_py = PyDict_GetItemString(metadata, "scale"); + PyObject* zero_point_py = + PyDict_GetItemString(metadata, "zero_point"); + mgb_assert(scale_py && zero_point_py, + "Invalid Quantized4Asymm metadata: missing scale or " + "zero_point."); + mgb_assert( + PyFloat_Check(scale_py), + "Invalid Quantized4Asymm metadata: scale should be float"); + mgb_assert(PyLong_Check(zero_point_py), + "Invalid Quantized4Asymm metadata: zero_point should be " + "integer"); + auto zero_point = PyLong_AS_LONG(zero_point_py); + mgb_assert(zero_point >= 0 && zero_point < 15, + "Invalid Quantized4Asymm metadata: zero_point should be " + "in [0, 15)"); + return dtype::Quantized4Asymm( + static_cast(PyFloat_AS_DOUBLE(scale_py)), + static_cast(zero_point)); + } + if (dtype_name == "QuantizedS32" || dtype_name == "QuantizedS8" || + dtype_name == "QuantizedS4") { + PyObject* scale_py = PyDict_GetItemString(metadata, "scale"); + mgb_assert(scale_py, "Invalid metadata: missing scale"); + mgb_assert(PyFloat_Check(scale_py), + "Invalid metadata: scale should be float"); + float scale = static_cast(PyFloat_AS_DOUBLE(scale_py)); + if (dtype_name == "QuantizedS32") { + return dtype::QuantizedS32(scale); + } else if (dtype_name == "QuantizedS8"){ + return dtype::QuantizedS8(scale); + } else { + return dtype::QuantizedS4(scale); + } + } + throw ConversionError( + ssprintf("Unknown parameterized DType: %s", dtype_name.c_str()) + .c_str()); + }; + PyObject* dtype_metadata; + if (descr->metadata && PyDict_Check(descr->metadata) && + (dtype_metadata = PyDict_GetItemString(descr->metadata, "mgb_dtype"))) { + return handle_parameterized_dtype(dtype_metadata); + } + return dtype_np2mgb_raw(descr->type_num); +} + +HostTensorND lowbit_ndarray_to_host_tensor( + CompNode comp_node, TensorLayout &layout, PyArrayObject *input) { + auto src_ptr = reinterpret_cast(PyArray_DATA(input)); + if (!layout.ndim) { + // numpy scalar + mgb_assert(src_ptr, "can not convert from null numpy array"); + layout.init_contiguous_stride({1}); + } else { + mgb_assert(layout.ndim && layout.ndim <= TensorShape::MAX_NDIM, + "unsupported ndim %zu", layout.ndim); + for (size_t i = 0; i < layout.ndim; ++ i) { + layout.shape[i] = PyArray_SHAPE(input)[i]; + layout.stride[i] = PyArray_STRIDE(input, i); + mgb_assert(layout.shape[i], "zero shape not supported"); + } + mgb_assert(layout.is_contiguous()); + } + HostTensorND ret{comp_node, layout}; + lowbit_memcpy_byte2compact(layout.dtype, ret.raw_ptr(), src_ptr, + layout.total_nr_elems()); + return ret; +} + +/*! + * \brief convert a python object to tensor and try to borrow memory if the + * original object is a contiguous numpy array + * \param dtype see np2tensor + * \return the megbrain tensor, and whether memory is borrowed + */ +std::pair np2tensor_try_borrow( + PyObject *obj, const npy::Meth& meth, DType dtype) { + auto dest_cn = meth.dest_cn_; + mgb_assert(dest_cn.valid()); + + PYTHON_GIL; + + PyArray_Descr* expected_descr = nullptr; + if (dtype.valid()) { + // The reference to expected_descr will be stealed later. + expected_descr = dtype_mgb2np_descr(dtype).release(); + } + + // make result from PyArrayObject; its reference may be stolen + auto make_from_arr = [&](PyArrayObject *input, bool allow_borrow) { + + TensorLayout layout; + layout.dtype = dtype_np2mgb_descr(PyArray_DESCR(input)); + if (dtype.valid()) + mgb_assert(dtype == layout.dtype); + layout.ndim = PyArray_NDIM(input); + + if (layout.dtype.is_low_bit()) { + auto ret = lowbit_ndarray_to_host_tensor(dest_cn, layout, input); + if (meth.dest_tensor_) { + meth.dest_tensor_->copy_from(ret); + ret = *meth.dest_tensor_; + } + return std::make_pair(ret, false); + } + + auto data = reinterpret_cast(PyArray_DATA(input)); + if (!layout.ndim) { + // numpy scalar + mgb_assert(data, "can not convert from null numpy array"); + layout.init_contiguous_stride({1}); + } else { + mgb_assert(layout.ndim && layout.ndim <= TensorShape::MAX_NDIM, + "unsupported ndim %zu", layout.ndim); + auto dsize = layout.dtype.size(); + bool is_empty = false; + for (size_t i = 0; i < layout.ndim; ++ i) { + layout.shape[i] = PyArray_SHAPE(input)[i]; + layout.stride[i] = PyArray_STRIDE(input, i); + if (!layout.shape[i]) { + is_empty = true; + } + mgb_assert(layout.stride[i] % dsize == 0, + "bad stride %zd", layout.stride[i]); + layout.stride[i] /= dsize; + } + mgb_assert(is_empty || layout.is_contiguous()); + } + + if (!meth.dest_tensor_ && allow_borrow) { + Py_INCREF(input); + PyObjRefKeeper ref_obj_cvt{reinterpret_cast(input)}; + HostTensorStorage storage; + auto input_ptr = ref_obj_cvt.make_shared(data); + storage.reset(dest_cn, layout.span().high_byte, input_ptr); + HostTensorND ret; + ret.reset(storage, layout); + return std::make_pair(ret, true); + } else { + auto storage = HostTensorStorage(dest_cn); + storage.ensure_size(layout.span().dist_byte()); + memcpy(storage.ptr(), data, layout.span().dist_byte()); + HostTensorND ret{dest_cn, layout.dtype}; + if (meth.dest_tensor_) { + meth.dest_tensor_->reset(storage, layout); + return std::make_pair(*meth.dest_tensor_, false); + } else { + HostTensorND ret; + ret.reset(storage, layout); + return std::make_pair(ret, false); + } + } + }; + + PyArrayObject *obj_as_arr = nullptr; + do { + // check contiguous and dtype, and borrow mem if ok + if (!PyArray_Check(obj)) + break; + obj_as_arr = reinterpret_cast(obj); + int typenum = PyArray_DTYPE(obj_as_arr)->type_num; + // We have to check dtype.valid() and typenum first to avoid + // accidentally trigger ConversionError on incompatible dtypes which can + // be automatically converted into comptaible ones (e.g. float64). + if (dtype.valid() && + (expected_descr->type_num != typenum || + dtype_np2mgb_descr(PyArray_DTYPE(obj_as_arr)) != dtype)) + break; + if (typenum != to_mgb_supported_dtype_raw(typenum)) { + mgb_assert(!dtype.valid() && expected_descr == nullptr); + expected_descr = + PyArray_DescrFromType(to_mgb_supported_dtype_raw(typenum)); + break; + } + if (PyArray_ISCARRAY_RO(obj_as_arr)) { + return make_from_arr(obj_as_arr, true); + } + } while(0); + + constexpr auto NP_FLAGS = NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_FORCECAST; + PyObject *obj_cvt; + if (obj_as_arr) { + obj_cvt = PyArray_FromArray(obj_as_arr, expected_descr, NP_FLAGS); + } else { + obj_cvt = PyArray_FromAny(obj, expected_descr, 0, 0, NP_FLAGS, nullptr); + } + + if (obj_cvt) { + // convert to mgb supported dtype + auto arr = reinterpret_cast(obj_cvt); + int dt0 = PyArray_TYPE(arr), dt1 = to_mgb_supported_dtype_raw(dt0); + if (dt0 != dt1) { + mgb_assert(expected_descr == nullptr); + expected_descr = PyArray_DescrFromType(dt1); + mgb_assert(expected_descr); + auto obj_cvt_new = PyArray_FromAny( + obj_cvt, expected_descr, 0, 0, NP_FLAGS, nullptr); + Py_DECREF(obj_cvt); + obj_cvt = obj_cvt_new; + } + } + + if (!obj_cvt) { + if (PyErr_Occurred()) { + PyExceptionForward::throw_(); + } + throw ConversionError(ssprintf("can not convert to numpy array from %s", + repr_pyobj(obj).c_str())); + } + + auto ret = make_from_arr(reinterpret_cast(obj_cvt), false); + Py_DECREF(obj_cvt); + return ret; +} + +//! hold a reference to HostTensorND +class HostTensorNDRefHolder final: public NonCopyableObj { + HostTensorND m_val; + static MemPool sm_mem_pool; + + friend class MemPool; + + HostTensorNDRefHolder(const HostTensorND &v): + m_val{v} + { + } + + public: + + static HostTensorNDRefHolder* alloc(const HostTensorND &v) { + return sm_mem_pool.alloc(v); + } + + static void free(HostTensorNDRefHolder *p) { + return sm_mem_pool.free(p); + } +}; +MemPool HostTensorNDRefHolder::sm_mem_pool; + +void ndarray_shared_from_tensor_py_capsule_dtor(PyObject *cap) { + auto ptr = PyCapsule_GetPointer(cap, "HostTensorND"); + mgb_assert(ptr, "not a PyCapsule: %s", repr_pyobj(cap).c_str()); + HostTensorNDRefHolder::free(static_cast(ptr)); +} + +} // anonymous namespace + +PyObject* npy::ndarray_from_tensor( + const HostTensorND &val, ShareType share_type) { + if (!val.layout().is_contiguous() && !val.shape().is_empty()) { + mgb_assert(share_type != ShareType::MUST_SHARE); + HostTensorND contig; + contig.copy_from(val); + return ndarray_from_tensor(contig, ShareType::TRY_SHARE); + } + PYTHON_GIL; + npy_intp dims[TensorLayout::MAX_NDIM]; + for (size_t i = 0; i < val.layout().ndim; ++ i) + dims[i] = val.shape()[i]; + PyObject* ret = nullptr; + + auto alloc_new_ret = [&]() { + mgb_assert(!ret); + ret = PyArray_NewFromDescr( + &PyArray_Type, dtype_mgb2np_descr(val.dtype()).release(), + val.layout().ndim, dims, nullptr, nullptr, 0, nullptr); + mgb_assert(ret, "failed to allocate array"); + mgb_assert(PyArray_Check(ret)); + return PyArray_DATA(reinterpret_cast(ret)); + }; + if (val.dtype().is_low_bit()) { + mgb_assert(share_type != ShareType::MUST_SHARE, + "can not share memory for lowbit dtype"); + lowbit_memcpy_compact2byte(val.dtype(), alloc_new_ret(), val.raw_ptr(), + val.layout().total_nr_elems()); + } else if (share_type == ShareType::MUST_UNSHARE) { + memcpy(alloc_new_ret(), val.raw_ptr(), val.layout().span().dist_byte()); + } else { + // share data + ret = PyArray_NewFromDescr( + &PyArray_Type, dtype_mgb2np_descr(val.dtype()).release(), + val.layout().ndim, dims, nullptr, + const_cast(val.raw_ptr()), 0, nullptr); + mgb_assert(ret, "failed to alloc ndarray"); + auto capsule = PyCapsule_New(HostTensorNDRefHolder::alloc(val), + "HostTensorND", ndarray_shared_from_tensor_py_capsule_dtor); + mgb_assert(capsule, "failed to create PyCapsule"); + auto err = PyArray_SetBaseObject( + reinterpret_cast(ret), capsule); + mgb_assert(!err); + } + return ret; +} + +HostTensorND npy::np2tensor(PyObject* obj, const Meth& meth, DType dtype) { + auto ret_full = np2tensor_try_borrow(obj, meth, dtype); + if (meth.must_borrow_) { + mgb_assert(ret_full.second, + "can not borrow from numpy array as contig array with dtype " + "%s; src=%s", + dtype.name(), repr_pyobj(obj).c_str()); + } + return ret_full.first; +} + +PyObject* npy::dtype_mgb2np(mgb::DType dtype) { + PYTHON_GIL; + // According to + // https://docs.scipy.org/doc/numpy/reference/c-api.array.html#c.PyArray_TypeObjectFromType + // the following is equivalent to PyArray_TypeObjectFromType for built-in + // types. + auto descr = dtype_mgb2np_descr(dtype); + if (descr == nullptr) { + return nullptr; + } + if (dtype.has_param()) { + return reinterpret_cast(descr.release()); + } + PyObject* typeobj = reinterpret_cast(descr->typeobj); + Py_XINCREF(typeobj); + return typeobj; +} + +mgb::DType npy::dtype_np2mgb(PyObject *obj) { + mgb_assert(obj && obj != Py_None, + "can not convert null PyObject to numpy dtype"); + // see + // http://stackoverflow.com/questions/8477122/numpy-c-api-convert-type-object-to-type-number + PYTHON_GIL; + + PyArray_Descr* dtype; + if(!PyArray_DescrConverter(obj, &dtype)) { + throw ConversionError(ssprintf("can not convert to np.dtype from %s", + repr_pyobj(obj).c_str())); + } + + mgb::DType result = dtype_np2mgb_descr(dtype); + Py_DECREF(dtype); + return result; +} + +PyObject* npy::to_mgb_supported_dtype(PyObject* dtype) { + PYTHON_GIL; + + PyArray_Descr* descr; + if (!PyArray_DescrConverter(dtype, &descr)) { + throw ConversionError(ssprintf("can not convert to np.dtype from %s", + repr_pyobj(dtype).c_str())); + } + mgb_assert(!descr->metadata, + "unexpected metadata in dtype: " + "dtype_obj=%s metadata=%s", + repr_pyobj(dtype).c_str(), repr_pyobj(descr->metadata).c_str()); + int type_num = to_mgb_supported_dtype_raw(descr->type_num); + return PyArray_TypeObjectFromType(type_num); +} + +TensorShape npy::vec2shape(const std::vector &vec) { + TensorShape shape; + mgb_assert(vec.size() <= TensorShape::MAX_NDIM, + "dim too large: %zd (max %zd)", + vec.size(), TensorShape::MAX_NDIM); + shape.ndim = vec.size(); + for (size_t i = 0; i < vec.size(); i ++) { + if (!vec[i]) { + shape.ndim = 0; + break; + } + shape[i] = vec[i]; + } + mgb_assert(shape.ndim, "shape should not be empty"); + return shape; +} diff --git a/imperative/python/src/helper.h b/imperative/python/src/helper.h new file mode 100644 index 0000000000000000000000000000000000000000..f97b6fd0c3ec349ba4ef3d3d5288b632df1ea147 --- /dev/null +++ b/imperative/python/src/helper.h @@ -0,0 +1,320 @@ +#pragma once + +#include "megbrain/graph.h" + +#include +#include +#include +#if __cplusplus > 201703L +#include +#endif +#include +#include +#include +#include + +pybind11::module submodule(pybind11::module parent, const char* name, const char* doc = nullptr); + +pybind11::module rel_import(pybind11::str name, pybind11::module m, int level); + +#if __cplusplus > 201703L +using std::ranges::range_value_t; +#else +template +using range_value_t = std::remove_cv_t().begin())>>; +#endif + +template +auto to_list(const T& x) { + using elem_t = range_value_t; + std::vector ret(x.begin(), x.end()); + return pybind11::cast(ret); +} + +template +auto to_tuple(const T& x, pybind11::return_value_policy policy = pybind11::return_value_policy::automatic) { + auto ret = pybind11::tuple(x.size()); + for (size_t i = 0; i < x.size(); ++i) { + ret[i] = pybind11::cast(x[i], policy); + } + return ret; +} + +template +auto to_tuple(T begin, T end, pybind11::return_value_policy policy = pybind11::return_value_policy::automatic) { + auto ret = pybind11::tuple(end - begin); + for (size_t i = 0; begin < end; ++begin, ++i) { + ret[i] = pybind11::cast(*begin, policy); + } + return ret; +} + +class PyTaskDipatcher { + struct Queue : mgb::AsyncQueueSC, Queue> { + using Task = std::function; + void process_one_task(Task& f) { + if (!Py_IsInitialized()) return; + pybind11::gil_scoped_acquire _; + f(); + } + }; + Queue queue; + bool finalized = false; +public: + template + void add_task(T&& task) { + // CPython never dlclose an extension so + // finalized means the interpreter has been shutdown + if (!finalized) { + queue.add_task(std::forward(task)); + } + } + void wait_all_task_finish() { + queue.wait_all_task_finish(); + } + ~PyTaskDipatcher() { + finalized = true; + queue.wait_all_task_finish(); + } +}; + +extern PyTaskDipatcher py_task_q; + +class GILManager { + PyGILState_STATE gstate; + + public: + GILManager(): + gstate(PyGILState_Ensure()) + { + } + + ~GILManager() { + PyGILState_Release(gstate); + } +}; +#define PYTHON_GIL GILManager __gil_manager + +//! wraps a shared_ptr and decr PyObject ref when destructed +class PyObjRefKeeper { + std::shared_ptr m_ptr; + +public: + static void deleter(PyObject* p) { + if (p) { + py_task_q.add_task([p](){Py_DECREF(p);}); + } + } + + PyObjRefKeeper() = default; + PyObjRefKeeper(PyObject* p) : m_ptr{p, deleter} {} + + PyObject* get() const { return m_ptr.get(); } + + //! create a shared_ptr as an alias of the underlying ptr + template + std::shared_ptr make_shared(T* ptr) const { + return {m_ptr, ptr}; + } +}; + +//! exception to be thrown when python callback fails +class PyExceptionForward : public std::exception { + PyObject *m_type, *m_value, *m_traceback; + std::string m_msg; + + PyExceptionForward(PyObject* type, PyObject* value, PyObject* traceback, + const std::string& msg) + : m_type{type}, + m_value{value}, + m_traceback{traceback}, + m_msg{msg} {} + +public: + PyExceptionForward(const PyExceptionForward&) = delete; + PyExceptionForward& operator=(const PyExceptionForward&) = delete; + ~PyExceptionForward(); + + PyExceptionForward(PyExceptionForward&& rhs) + : m_type{rhs.m_type}, + m_value{rhs.m_value}, + m_traceback{rhs.m_traceback}, + m_msg{std::move(rhs.m_msg)} { + rhs.m_type = rhs.m_value = rhs.m_traceback = nullptr; + } + + //! throw PyExceptionForward from current python error state + static void throw_() __attribute__((noreturn)); + + //! restore python error + void restore(); + + const char* what() const noexcept override { return m_msg.c_str(); } +}; + +//! numpy utils +namespace npy { + //! convert tensor shape to raw vector + static inline std::vector shape2vec(const mgb::TensorShape &shape) { + return {shape.shape, shape.shape + shape.ndim}; + } + + //! change numpy dtype to megbrain supported dtype + PyObject* to_mgb_supported_dtype(PyObject *dtype); + + //! convert raw vector to tensor shape + mgb::TensorShape vec2shape(const std::vector &vec); + + //! convert megbrain dtype to numpy dtype object; return new reference + PyObject* dtype_mgb2np(mgb::DType dtype); + + //! convert numpy dtype object or string to megbrain dtype + mgb::DType dtype_np2mgb(PyObject *obj); + + //! buffer sharing type + enum class ShareType { + MUST_SHARE, //!< must be shared + MUST_UNSHARE, //!< must not be shared + TRY_SHARE //!< share if possible + }; + + //! get ndarray from HostTensorND + PyObject* ndarray_from_tensor(const mgb::HostTensorND &val, + ShareType share_type); + + //! specify how to convert numpy array to tensor + struct Meth { + bool must_borrow_ = false; + mgb::HostTensorND *dest_tensor_ = nullptr; + mgb::CompNode dest_cn_; + + //! make a Meth that allows borrowing numpy array memory + static Meth borrow( + mgb::CompNode dest_cn = mgb::CompNode::default_cpu()) { + return {false, nullptr, dest_cn}; + } + + //! make a Meth that requires the numpy array to be borrowed + static Meth must_borrow( + mgb::CompNode dest_cn = mgb::CompNode::default_cpu()) { + return {true, nullptr, dest_cn}; + } + + //! make a Meth that requires copying the value into another + //! tensor + static Meth copy_into(mgb::HostTensorND *tensor) { + return {false, tensor, tensor->comp_node()}; + } + }; + /*! + * \brief convert an object to megbrain tensor + * \param meth specifies how the conversion should take place + * \param dtype desired dtype; it can be set as invalid to allow arbitrary + * dtype + */ + mgb::HostTensorND np2tensor(PyObject *obj, const Meth &meth, + mgb::DType dtype); +} + +// Note: following macro was copied from pybind11/detail/common.h +// Robust support for some features and loading modules compiled against different pybind versions +// requires forcing hidden visibility on pybind code, so we enforce this by setting the attribute on +// the main `pybind11` namespace. +#if !defined(PYBIND11_NAMESPACE) +# ifdef __GNUG__ +# define PYBIND11_NAMESPACE pybind11 __attribute__((visibility("hidden"))) +# else +# define PYBIND11_NAMESPACE pybind11 +# endif +#endif + +namespace PYBIND11_NAMESPACE { +namespace detail { + + template struct type_caster> + : list_caster, T> {}; + + template <> struct type_caster { + PYBIND11_TYPE_CASTER(mgb::DType, _("DType")); + public: + bool load(handle src, bool convert) { + auto obj = reinterpret_borrow(src); + if (!convert && !isinstance(obj)) { + return false; + } + if (obj.is_none()) { + return true; + } + try { + obj = pybind11::dtype::from_args(obj); + } catch (pybind11::error_already_set&) { + return false; + } + try { + value = npy::dtype_np2mgb(obj.ptr()); + } catch (...) { + return false; + } + return true; + } + + static handle cast(mgb::DType dt, return_value_policy /* policy */, handle /* parent */) { + // ignore policy and parent because we always return a pure python object + return npy::dtype_mgb2np(std::move(dt)); + } + }; + + template <> struct type_caster { + PYBIND11_TYPE_CASTER(mgb::TensorShape, _("TensorShape")); + public: + bool load(handle src, bool convert) { + auto obj = reinterpret_steal(src); + if (!isinstance(obj)) { + return false; + } + value.ndim = len(obj); + mgb_assert(value.ndim <= mgb::TensorShape::MAX_NDIM); + size_t i = 0; + for (auto v : obj) { + mgb_assert(i < value.ndim); + value.shape[i] = reinterpret_borrow(v).cast(); + ++i; + } + return true; + } + + static handle cast(mgb::TensorShape shape, return_value_policy /* policy */, handle /* parent */) { + // ignore policy and parent because we always return a pure python object + return to_tuple(shape.shape, shape.shape + shape.ndim).release(); + } + }; + + // hack to make custom object implicitly convertible from None + template struct from_none_caster : public type_caster_base { + using base = type_caster_base; + bool load(handle src, bool convert) { + if (!convert || !src.is_none()) { + return base::load(src, convert); + } + // adapted from pybind11::implicitly_convertible + auto temp = reinterpret_steal(PyObject_Call( + (PyObject*) this->typeinfo->type, tuple().ptr(), nullptr)); + if (!temp) { + PyErr_Clear(); + return false; + } + // adapted from pybind11::detail::type_caster_generic + if (base::load(temp, false)) { + loader_life_support::add_patient(temp); + return true; + } + return false; + } + }; + + template<> struct type_caster : public from_none_caster {}; + +} // detail +} // PYBIND11_NAMESPACE + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/python/src/imperative_rt.cpp b/imperative/python/src/imperative_rt.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6e33832a69ce41726bfef63de760b84b7dd9390a --- /dev/null +++ b/imperative/python/src/imperative_rt.cpp @@ -0,0 +1,94 @@ +#include "./imperative_rt.h" + +#include +#include +#include +#include +#include + +#include "megbrain/imperative.h" +#include "megbrain/imperative/interpreter.h" +#include "megbrain/imperative/ops/opr_attr.h" +#include "./helper.h" + +namespace py = pybind11; + +using namespace mgb; +using namespace imperative; +using namespace interpreter; + + +namespace { + +std::optional, std::vector, std::vector>> +make_backward_graph( + const OpDef& opdef, std::vector inputs, + std::vector input_requires_grad, + std::vector output_has_grad) { + auto res = OpDef::make_backward_graph(opdef, + SmallVector(inputs.begin(), inputs.end()), + SmallVector(input_requires_grad.begin(), input_requires_grad.end()), + SmallVector(output_has_grad.begin(), output_has_grad.end())); + if (res.backward) { + return std::optional, std::vector, std::vector>>{ + std::in_place, res.backward, res.save_for_backward, res.input_has_grad}; + } else { + return {}; + } +} +} // namespace + +void init_imperative_rt(py::module m) { + py::class_(m, "Interpreter") + .def("put", [](Interpreter::Channel& self, py::array data, DType dtype, CompNode cn) { + if (!cn.valid()) { + cn = CompNode::load("xpux"); + } + constexpr int size_threshhold = TensorShape::MAX_NDIM; + if (data.size() > size_threshhold) { + return self.put(npy::np2tensor(data.ptr(), npy::Meth::borrow(cn), dtype)); + } else { + HostTensorND ret(cn); + return self.put(npy::np2tensor(data.ptr(), npy::Meth::copy_into(&ret), dtype)); + } + }, py::arg(), py::arg("dtype") = py::none(), py::arg("device") = py::none()) + .def("delete", [](Interpreter::Channel& self, Interpreter::Handle handle) { + return self.del(handle); + }) + .def("get_value", [](Interpreter::Channel& self, Interpreter::Handle handle) { + PyObject* optr = npy::ndarray_from_tensor(self.get_value(handle), npy::ShareType::TRY_SHARE); + return py::reinterpret_steal(optr); + }) + .def("get_dtype", &Interpreter::Channel::get_dtype) + .def("get_device", &Interpreter::Channel::get_device) + .def("get_shape", &Interpreter::Channel::get_shape) + .def("_get_dev_tensor", &Interpreter::Channel::get_dev_tensor) + .def("apply_op", &Interpreter::Channel::apply_op) + .def("sync", &Interpreter::Channel::sync); + + std::unique_ptr ch = Interpreter::inst().create_channel(); + m.attr("interpreter") = py::detail::make_caster::cast( + std::move(ch), py::return_value_policy::move, {}); + for (auto name : {"put", "delete", "get_value", "get_dtype", "get_device", "get_shape", "_get_dev_tensor", "apply_op"}) { + m.attr(name) = m.attr("interpreter").attr(name); + } + + m.def("sync", [m]() { + m.attr("interpreter").attr("sync")(); + py_task_q.wait_all_task_finish(); + }); + + m.def("make_backward_graph", &make_backward_graph); + + py::class_>(m, "OpDef") + .def("ctype", [](const OpDef& opdef) { + if (auto attr = opdef.try_cast_final()) { + return attr->type.c_str(); + } + return opdef.dyn_typeinfo()->name; + }) + .def("__eq__", [](const OpDef& lhs, const OpDef& rhs) { + return lhs.is_same(rhs); + }) + .def("__hash__", &OpDef::hash); +} diff --git a/imperative/python/src/imperative_rt.h b/imperative/python/src/imperative_rt.h new file mode 100644 index 0000000000000000000000000000000000000000..2194bdb4ebbeb4a4a746b8a6eb15fdd70847dbd4 --- /dev/null +++ b/imperative/python/src/imperative_rt.h @@ -0,0 +1,7 @@ +#pragma once + +#include "./helper.h" + +#include "megbrain/imperative.h" + +void init_imperative_rt(pybind11::module m); diff --git a/imperative/python/src/module.cpp b/imperative/python/src/module.cpp new file mode 100644 index 0000000000000000000000000000000000000000..309a0220899c63ffc36486e024983ce211cb69bb --- /dev/null +++ b/imperative/python/src/module.cpp @@ -0,0 +1,55 @@ +#include + +#define DO_IMPORT_ARRAY +#include "./numpy_dtypes.h" +#include "./helper.h" + +#include "./common.h" +#include "./utils.h" +#include "./imperative_rt.h" +#include "./graph_rt.h" +#include "./ops.h" + +namespace py = pybind11; + +#ifndef MODULE_NAME +#define MODULE_NAME imperative_rt +#endif + +PYBIND11_MODULE(MODULE_NAME, m) { + // initialize numpy + if ([]() {import_array1(1); return 0;}()) { + throw py::error_already_set(); + } + + py::module::import("sys").attr("modules")[m.attr("__name__")] = m; + + m.attr("__package__") = m.attr("__name__"); + m.attr("__builtins__") = py::module::import("builtins"); + + auto atexit = py::module::import("atexit"); + atexit.attr("register")(py::cpp_function([]() { + py::gil_scoped_release _; + py_task_q.wait_all_task_finish(); + })); + + auto common = submodule(m, "common"); + auto utils = submodule(m, "utils"); + auto imperative = submodule(m, "imperative"); + auto graph = submodule(m, "graph"); + auto ops = submodule(m, "ops"); + + init_common(common); + init_utils(utils); + init_imperative_rt(imperative); + init_graph_rt(graph); + init_ops(ops); + + py::exec(R"( + from .common import * + from .utils import * + from .imperative import * + from .graph import * + )", + py::getattr(m, "__dict__")); +} diff --git a/imperative/python/src/numpy_dtypes.h b/imperative/python/src/numpy_dtypes.h new file mode 100644 index 0000000000000000000000000000000000000000..d13780d582955a75c1d340fc56315be06ef400fe --- /dev/null +++ b/imperative/python/src/numpy_dtypes.h @@ -0,0 +1,45 @@ +/** + * \file imperative/python/src/numpy_dtypes.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \brief import numpy array with proper settings + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ +#pragma once + +#ifndef DO_IMPORT_ARRAY +#define NO_IMPORT_ARRAY +#endif +#define PY_ARRAY_UNIQUE_SYMBOL mgb_numpy_array_api +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include +#include + +#define FOREACH_MGB_LOW_BIT(cb) \ + cb(1) \ + cb(2) \ + cb(4) \ + +#define FOREACH_MGB_DTYPE_PAIR(cb) \ + cb(IntB1, npy_num_intb1()) \ + cb(IntB2, npy_num_intb2()) \ + cb(IntB4, npy_num_intb4()) \ + cb(BFloat16, npy_num_bfloat16()) \ + +namespace mgb { + //! numpy type num for intb1/2/4 type +#define DEFINE_NPY_INTBX(n) \ + int npy_num_intb##n(); +FOREACH_MGB_LOW_BIT(DEFINE_NPY_INTBX) +#undef DEFINE_NPY_INTBX + void init_npy_num_intbx(pybind11::module m); + + //! numpy type num for bfloat16 type + int npy_num_bfloat16(); + void init_npy_num_bfloat16(pybind11::module m); +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/python/src/numpy_dtypes_bfloat16.cpp b/imperative/python/src/numpy_dtypes_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..52d122aa84faed567e60a45620bd21966b33cc7d --- /dev/null +++ b/imperative/python/src/numpy_dtypes_bfloat16.cpp @@ -0,0 +1,275 @@ +/** + * \file imperative/python/src/numpy_dtypes_bfloat16.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \brief numpy dtypes for bfloat16 + * + * \copyright Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + */ +#include "./numpy_dtypes.h" + +#include +#include +#include + +#include "megbrain/common.h" +#include "megbrain/dtype.h" + +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + +namespace { + +struct BFloat16Type { + static int npy_typenum; + mgb::dt_bfloat16 value; + + struct PyObj; + struct NpyType; + + template + struct NpyCast; +}; + +int BFloat16Type::npy_typenum; + +/* ==================== BFloat16Type::NpyCast ==================== */ + +template +struct BFloat16Type::NpyCast { + static void apply(void* from_, void* to_, npy_intp n, void* /*fromarr*/, + void* /*toarr*/) { + auto from = static_cast(from_); + auto to = static_cast(to_); + for (npy_intp i = 0; i < n; ++i) { + float cur = static_cast(from[i]); + to[i].value = cur; + } + } +}; + +template +struct BFloat16Type::NpyCast { + static void apply(void* from_, void* to_, npy_intp n, void* /*fromarr*/, + void* /*toarr*/) { + auto from = static_cast(from_); + auto to = static_cast(to_); + for (npy_intp i = 0; i < n; ++i) { + to[i] = from[i].value; + } + } +}; + +/* ==================== BFloat16Type::PyObj ==================== */ +struct BFloat16Type::PyObj { + PyObject_HEAD BFloat16Type obj; + + static PyTypeObject py_type; + + static PyObject* from_bfloat16(BFloat16Type val) { + auto p = reinterpret_cast(py_type.tp_alloc(&py_type, 0)); + p->obj.value = val.value; + return reinterpret_cast(p); + } + + static PyObject* py_new(PyTypeObject* type, PyObject* args, PyObject* kwds); + static PyObject* py_repr(PyObject* obj); + static PyObject* py_richcompare(PyObject* a, PyObject* b, int op); +}; +PyTypeObject BFloat16Type::PyObj::py_type; + +PyObject* BFloat16Type::PyObj::py_new(PyTypeObject* type, PyObject* args, + PyObject* kwds) { + PyObj* self; + Py_ssize_t size; + + self = (PyObj*)type->tp_alloc(type, 0); + + size = PyTuple_GET_SIZE(args); + if (size > 1) { + PyErr_SetString(PyExc_TypeError, "BFloat16Type Only has 1 parameter"); + return NULL; + } + PyObject* x = PyTuple_GET_ITEM(args, 0); + if (PyObject_IsInstance(x, (PyObject*)&py_type)) { + Py_INCREF(x); + return x; + } + + if (!PyFloat_Check(x)) { + PyErr_SetString(PyExc_TypeError, + "BFloat16Type must be initialized wit float"); + return NULL; + } + + const float s = PyFloat_AsDouble(x); + + self->obj.value = s; + + return (PyObject*)self; +} + +PyObject* BFloat16Type::PyObj::py_repr(PyObject* obj) { + float fval = static_cast(((PyObj*)obj)->obj.value); + return PyUnicode_FromString(mgb::ssprintf("%f", fval).c_str()); +} + +PyObject* BFloat16Type::PyObj::py_richcompare(PyObject* a, PyObject* b, + int op) { + mgb_assert(PyObject_IsInstance(a, (PyObject*)&py_type)); + auto bval = PyFloat_AsDouble(b); + if (bval == -1 && PyErr_Occurred()) { + return NULL; + } + double aval = ((PyObj*)a)->obj.value; +#define OP(py, op) \ + case py: { \ + if (aval op bval) { \ + Py_RETURN_TRUE; \ + } else { \ + Py_RETURN_FALSE; \ + } \ + } + switch (op) { + OP(Py_LT, <) + OP(Py_LE, <=) + OP(Py_EQ, ==) + OP(Py_NE, !=) + OP(Py_GT, >) + OP(Py_GE, >=) + }; +#undef OP + return Py_NotImplemented; +} + +/* ==================== BFloat16Type::NpyType ==================== */ +struct BFloat16Type::NpyType { + static PyArray_ArrFuncs funcs; + static PyArray_Descr descr; + + static bool init(); + + static void copyswap(void* dst, void* src, int swap, void* /*arr*/) { + if (src) { + mgb_assert(!swap); + memcpy(dst, src, sizeof(BFloat16Type)); + } + } + static PyObject* getitem(void* data, void* ap) { + return BFloat16Type::PyObj::from_bfloat16( + *static_cast(data)); + } + static int setitem(PyObject* op, void* ov, void* ap); +}; + +PyArray_ArrFuncs BFloat16Type::NpyType::funcs; +PyArray_Descr BFloat16Type::NpyType::descr; + +int BFloat16Type::NpyType::setitem(PyObject* op, void* ov, void* ap) { + if (PyLong_Check(op)) { + int a = PyLong_AsLong(op); + static_cast(ov)->value = a; + } else if (PyFloat_Check(op)) { + float a = PyFloat_AsDouble(op); + static_cast(ov)->value = a; + } else if (PyObject_IsInstance( + op, (PyObject*)(&(BFloat16Type::PyObj::py_type)))) { + static_cast(ov)->value = ((PyObj*)op)->obj.value; + } else { + PyErr_SetString(PyExc_ValueError, + "input type must be int/float/bfloat16"); + return -1; + } + return 0; +} + +bool BFloat16Type::NpyType::init() { + descr = {PyObject_HEAD_INIT(0) & BFloat16Type::PyObj::py_type, + 'V', // kind + 'f', // type + '=', // byteorder + NPY_NEEDS_PYAPI | NPY_USE_GETITEM | NPY_USE_SETITEM, + 1, // type num + sizeof(BFloat16Type), + alignof(BFloat16Type), + NULL, + NULL, + NULL, + &funcs}; + Py_TYPE(&descr) = &PyArrayDescr_Type; + PyArray_InitArrFuncs(&funcs); + funcs.copyswap = copyswap; + funcs.getitem = getitem; + funcs.setitem = setitem; + npy_typenum = PyArray_RegisterDataType(&descr); + +#define REGISTER_CAST(From, To, From_descr, To_typenum, safe) \ + { \ + PyArray_Descr* from_descr = (From_descr); \ + if (PyArray_RegisterCastFunc(from_descr, (To_typenum), \ + NpyCast::apply) < 0) { \ + return false; \ + } \ + if (safe && PyArray_RegisterCanCast(from_descr, (To_typenum), \ + NPY_NOSCALAR) < 0) { \ + return false; \ + } \ + } +#define REGISTER_INT_CASTS(bits) \ + REGISTER_CAST(npy_int##bits, BFloat16Type, \ + PyArray_DescrFromType(NPY_INT##bits), \ + BFloat16Type::npy_typenum, 1) \ + REGISTER_CAST(BFloat16Type, npy_int##bits, &descr, NPY_INT##bits, 0) \ + REGISTER_CAST(npy_uint##bits, BFloat16Type, \ + PyArray_DescrFromType(NPY_UINT##bits), \ + BFloat16Type::npy_typenum, 1) \ + REGISTER_CAST(BFloat16Type, npy_uint##bits, &descr, NPY_UINT##bits, 0) + + REGISTER_INT_CASTS(8) + REGISTER_INT_CASTS(16) + REGISTER_INT_CASTS(32) + REGISTER_INT_CASTS(64) + REGISTER_CAST(BFloat16Type, float, &descr, NPY_FLOAT, 0) + REGISTER_CAST(float, BFloat16Type, PyArray_DescrFromType(NPY_FLOAT), + BFloat16Type::npy_typenum, 0) + REGISTER_CAST(BFloat16Type, double, &descr, NPY_DOUBLE, 1) + REGISTER_CAST(double, BFloat16Type, PyArray_DescrFromType(NPY_DOUBLE), + BFloat16Type::npy_typenum, 0) + return true; +} + +} // anonymous namespace + +// define a new python type: pybfloat16 +bool init_pytype_bfloat16() { + auto& py_type = BFloat16Type::PyObj::py_type; + py_type = {PyVarObject_HEAD_INIT(NULL, 0)}; + py_type.tp_name = "megengine.core._imperative_rt.pybfloat16"; + py_type.tp_basicsize = sizeof(BFloat16Type::PyObj); + py_type.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE; + py_type.tp_doc = "bfloat16 type"; + py_type.tp_new = BFloat16Type::PyObj::py_new; + py_type.tp_str = BFloat16Type::PyObj::py_repr; + py_type.tp_repr = BFloat16Type::PyObj::py_repr; + py_type.tp_richcompare = BFloat16Type::PyObj::py_richcompare; + py_type.tp_base = &PyGenericArrType_Type; + return PyType_Ready(&py_type) >= 0; +} + +int mgb::npy_num_bfloat16() { + return BFloat16Type::npy_typenum; +} + +namespace py = pybind11; + +void mgb::init_npy_num_bfloat16(py::module m) { + mgb_assert(init_pytype_bfloat16()); + mgb_assert(BFloat16Type::NpyType::init()); + m.add_object("pybfloat16", reinterpret_cast( + &BFloat16Type::PyObj::py_type)); + m.add_object("bfloat16", reinterpret_cast( + PyArray_DescrFromType(npy_num_bfloat16()))); +} + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/python/src/numpy_dtypes_intbx.cpp b/imperative/python/src/numpy_dtypes_intbx.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d6d231cf01b78586739aff47d459bf3fd619e211 --- /dev/null +++ b/imperative/python/src/numpy_dtypes_intbx.cpp @@ -0,0 +1,333 @@ +/** + * \file imperative/python/src/numpy_dtypes_intbx.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \brief numpy dtypes for low bit + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ +#include "./numpy_dtypes.h" + +#include +#include + +#include "megbrain/common.h" + +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + +namespace { + +template +struct LowBitType { + static_assert(N < 8, "low bit only supports less than 8 bits"); + static int npy_typenum; + //! numerical value (-3, -1, 1, 3) + int8_t value; + + struct PyObj; + struct NpyType; + + const static int32_t max_value = (1 << N) - 1; + + //! check whether val is (-3, -1, 1, 3) and set python error + static bool check_value_set_err(int val) { + int t = val + max_value; + if ((t & 1) || t < 0 || t > (max_value << 1)) { + PyErr_SetString(PyExc_ValueError, + mgb::ssprintf("low bit dtype number error: " + "value=%d; allowed {-3, -1, 1, 3}", + val) + .c_str()); + return false; + } + + return true; + } + + template + struct NpyCast; +}; + +template +int LowBitType::npy_typenum; + +/* ==================== LowBitType::NpyCast ==================== */ + +template +template +struct LowBitType::NpyCast> { + static void apply(void* from_, void* to_, npy_intp n, void* /*fromarr*/, + void* /*toarr*/) { + auto from = static_cast(from_); + auto to = static_cast*>(to_); + for (npy_intp i = 0; i < n; ++i) { + int cur = static_cast(from[i]); + if (!LowBitType::check_value_set_err(cur)) + return; + to[i].value = cur; + } + } +}; + +template +template +struct LowBitType::NpyCast, T> { + static void apply(void* from_, void* to_, npy_intp n, void* /*fromarr*/, + void* /*toarr*/) { + auto from = static_cast*>(from_); + auto to = static_cast(to_); + for (npy_intp i = 0; i < n; ++i) { + to[i] = from[i].value; + } + } +}; + +/* ==================== LowBitType::PyObj ==================== */ +template +struct LowBitType::PyObj { + PyObject_HEAD LowBitType obj; + + static PyTypeObject py_type; + + static PyObject* from_lowbit(LowBitType val) { + auto p = reinterpret_cast(py_type.tp_alloc(&py_type, 0)); + p->obj.value = val.value; + return reinterpret_cast(p); + } + + static PyObject* py_new(PyTypeObject* type, PyObject* args, PyObject* kwds); + static PyObject* py_repr(PyObject* obj); + static PyObject* py_richcompare(PyObject* a, PyObject* b, int op); +}; +template +PyTypeObject LowBitType::PyObj::py_type; + +template +PyObject* LowBitType::PyObj::py_new(PyTypeObject* type, PyObject* args, + PyObject* kwds) { + PyObj* self; + Py_ssize_t size; + + self = (PyObj*)type->tp_alloc(type, 0); + + size = PyTuple_GET_SIZE(args); + if (size > 1) { + PyErr_SetString(PyExc_TypeError, "LowBitType Only has 1 parameter"); + return NULL; + } + PyObject* x = PyTuple_GET_ITEM(args, 0); + if (PyObject_IsInstance(x, (PyObject*)&py_type)) { + Py_INCREF(x); + return x; + } + + if (!PyLong_Check(x)) { + PyErr_SetString(PyExc_TypeError, + "LowBitType must be initialized wit int"); + return NULL; + } + + const long s = PyLong_AsLong(x); + + self->obj.value = s; + + return (PyObject*)self; +} + +template +PyObject* LowBitType::PyObj::py_repr(PyObject* obj) { + return PyUnicode_FromFormat("%d", ((PyObj*)obj)->obj.value); +} + +template +PyObject* LowBitType::PyObj::py_richcompare(PyObject* a, PyObject* b, + int op) { + mgb_assert(PyObject_IsInstance(a, (PyObject*)&py_type)); + auto bval = PyFloat_AsDouble(b); + if (bval == -1 && PyErr_Occurred()) { + return NULL; + } + double aval = ((PyObj*)a)->obj.value; +#define OP(py, op) \ + case py: { \ + if (aval op bval) { \ + Py_RETURN_TRUE; \ + } else { \ + Py_RETURN_FALSE; \ + } \ + } + switch (op) { + OP(Py_LT, <) + OP(Py_LE, <=) + OP(Py_EQ, ==) + OP(Py_NE, !=) + OP(Py_GT, >) + OP(Py_GE, >=) + }; +#undef OP + return Py_NotImplemented; +} + +/* ==================== LowBitType::NpyType ==================== */ +template +struct LowBitType::NpyType { + static PyArray_ArrFuncs funcs; + static PyArray_Descr descr; + + static bool init(); + + static void copyswap(void* dst, void* src, int swap, void* /*arr*/) { + if (src) { + mgb_assert(!swap); + memcpy(dst, src, sizeof(LowBitType)); + } + } + static PyObject* getitem(void* data, void* ap) { + return LowBitType::PyObj::from_lowbit( + *static_cast*>(data)); + } + static int setitem(PyObject* op, void* ov, void* ap); + static int fill(void* data_, npy_intp length, void* arr); +}; + +template +PyArray_ArrFuncs LowBitType::NpyType::funcs; +template +PyArray_Descr LowBitType::NpyType::descr; + +template +int LowBitType::NpyType::setitem(PyObject* op, void* ov, void* ap) { + if (!PyLong_Check(op)) { + PyErr_SetString(PyExc_ValueError, "input type must be int"); + return -1; + } + + int a = PyLong_AsLong(op); + if (!check_value_set_err(a)) + return -1; + + static_cast*>(ov)->value = a; + return 0; +} + +template +int LowBitType::NpyType::fill(void* data_, npy_intp length, void* arr) { + auto data = static_cast*>(data_); + int8_t delta = data[1].value - data[0].value, r = data[1].value; + if (!check_value_set_err(data[0].value) || + !check_value_set_err(data[1].value)) + return -1; + for (npy_intp i = 2; i < length; i++) { + r += delta; + if (r > max_value) + r = -max_value; + else if (r < -max_value) + r = max_value; + data[i].value = r; + } + return 0; +} + +template +bool LowBitType::NpyType::init() { + descr = {PyObject_HEAD_INIT(0) & LowBitType::PyObj::py_type, + 'V', // kind + 'r', // type + '=', // byteorder + NPY_NEEDS_PYAPI | NPY_USE_GETITEM | NPY_USE_SETITEM, + 0, // type num + sizeof(LowBitType), + alignof(LowBitType), + NULL, + NULL, + NULL, + &funcs}; + Py_TYPE(&descr) = &PyArrayDescr_Type; + PyArray_InitArrFuncs(&funcs); + funcs.copyswap = copyswap; + funcs.getitem = getitem; + funcs.setitem = setitem; + funcs.fill = fill; + npy_typenum = PyArray_RegisterDataType(&descr); + +#define REGISTER_CAST(From, To, From_descr, To_typenum, safe) \ + { \ + PyArray_Descr* from_descr = (From_descr); \ + if (PyArray_RegisterCastFunc(from_descr, (To_typenum), \ + NpyCast::apply) < 0) { \ + return false; \ + } \ + if (safe && PyArray_RegisterCanCast(from_descr, (To_typenum), \ + NPY_NOSCALAR) < 0) { \ + return false; \ + } \ + } +#define REGISTER_INT_CASTS(bits) \ + REGISTER_CAST(npy_int##bits, LowBitType, \ + PyArray_DescrFromType(NPY_INT##bits), \ + LowBitType::npy_typenum, 1) \ + REGISTER_CAST(LowBitType, npy_int##bits, &descr, NPY_INT##bits, 0) \ + REGISTER_CAST(npy_uint##bits, LowBitType, \ + PyArray_DescrFromType(NPY_UINT##bits), \ + LowBitType::npy_typenum, 1) \ + REGISTER_CAST(LowBitType, npy_uint##bits, &descr, NPY_UINT##bits, 0) + + REGISTER_INT_CASTS(8) + REGISTER_INT_CASTS(16) + REGISTER_INT_CASTS(32) + REGISTER_INT_CASTS(64) + REGISTER_CAST(LowBitType, float, &descr, NPY_FLOAT, 0) + REGISTER_CAST(float, LowBitType, PyArray_DescrFromType(NPY_FLOAT), + LowBitType::npy_typenum, 0) + REGISTER_CAST(LowBitType, double, &descr, NPY_DOUBLE, 1) + REGISTER_CAST(double, LowBitType, PyArray_DescrFromType(NPY_DOUBLE), + LowBitType::npy_typenum, 0) + return true; +} + +} // anonymous namespace + +#define DEFINE_INTBX(n) using IntB##n = LowBitType; +FOREACH_MGB_LOW_BIT(DEFINE_INTBX) +#undef DEFINE_INTBX + +// define a new python type: pyintb1/2/4 +#define DEFINE_INIT_PYTYPE(n) \ + bool init_pytype_intb##n() { \ + auto& py_type = IntB##n::PyObj::py_type; \ + py_type = {PyVarObject_HEAD_INIT(NULL, 0)}; \ + py_type.tp_name = "megengine.core._imperative_rt.pyintb" #n; \ + py_type.tp_basicsize = sizeof(IntB##n::PyObj); \ + py_type.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE; \ + py_type.tp_doc = "an low bit int type"; \ + py_type.tp_new = IntB##n::PyObj::py_new; \ + py_type.tp_str = IntB##n::PyObj::py_repr; \ + py_type.tp_repr = IntB##n::PyObj::py_repr; \ + py_type.tp_richcompare = IntB##n::PyObj::py_richcompare; \ + py_type.tp_base = &PyGenericArrType_Type; \ + return PyType_Ready(&py_type) >= 0; \ + } +FOREACH_MGB_LOW_BIT(DEFINE_INIT_PYTYPE) +#undef DEFINE_INIT_PYTYPE + +#define DEFINE_NPY_INTBX(n) \ + int mgb::npy_num_intb##n() { return IntB##n::npy_typenum; } +FOREACH_MGB_LOW_BIT(DEFINE_NPY_INTBX) +#undef DEFINE_NPY_INTBX + +namespace py = pybind11; + +void mgb::init_npy_num_intbx(py::module m) { +#define ADD_OBJ_INTBX(n) \ + mgb_assert(init_pytype_intb##n()); \ + mgb_assert(IntB##n::NpyType::init()); \ + m.add_object("pyintb" #n, reinterpret_cast( \ + &IntB##n::PyObj::py_type)); \ + m.add_object("intb" #n, reinterpret_cast( \ + PyArray_DescrFromType(npy_num_intb##n()))); + FOREACH_MGB_LOW_BIT(ADD_OBJ_INTBX) +} + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/python/src/ops.cpp b/imperative/python/src/ops.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2e2442538d90b49ba4c4980acece2761b28b5be9 --- /dev/null +++ b/imperative/python/src/ops.cpp @@ -0,0 +1,83 @@ +#include "./ops.h" + +#include "megbrain/imperative.h" +#include "megbrain/imperative/ops/backward_graph.h" +#include "megbrain/imperative/ops/opr_attr.h" +#include "megbrain/imperative/ops/tensor_manip.h" +#include "megbrain/imperative/ops/collective_comm.h" +#include "megbrain/imperative/ops/io_remote.h" +#include "megbrain/imperative/ops/cond_take.h" +#include "megbrain/imperative/ops/nms.h" + +namespace py = pybind11; + +void init_ops(py::module m) { + #include "opdef.inl" + + py::class_, OpDef>(m, "OprAttr") + .def(py::init<>()) + .def_readwrite("type", &OprAttr::type) + .def_readwrite("param", &OprAttr::param) + .def_readwrite("config", &OprAttr::config) + .def_property("param", + [](const OprAttr& attr) -> py::bytes { + return std::string(attr.param.begin(), attr.param.end()); + }, + [] (OprAttr& attr, py::bytes data) { + auto s = py::cast(data); + attr.param.clear(); + attr.param.insert(attr.param.end(), s.begin(), s.end()); + }); + + py::class_, OpDef>(m, "GetVarShape") + .def(py::init()); + + py::class_, OpDef>(m, "CollectiveComm") + .def(py::init<>()) + .def_readwrite("key", &CollectiveComm::key) + .def_readwrite("nr_devices", &CollectiveComm::nr_devices) + .def_readwrite("rank", &CollectiveComm::rank) + .def_readwrite("is_root", &CollectiveComm::is_root) + .def_readwrite("local_grad", &CollectiveComm::local_grad) + .def_readwrite("addr", &CollectiveComm::addr) + .def_readwrite("port", &CollectiveComm::port) + .def_readwrite("mode", &CollectiveComm::mode) + .def_readwrite("dtype", &CollectiveComm::dtype) + .def_readwrite("backend", &CollectiveComm::backend) + .def_readwrite("comp_node", &CollectiveComm::comp_node); + + py::class_, OpDef>(m, "RemoteSend") + .def(py::init<>()) + .def_readwrite("key", &RemoteSend::key) + .def_readwrite("addr", &RemoteSend::addr) + .def_readwrite("port", &RemoteSend::port) + .def_readwrite("rank_to", &RemoteSend::rank_to); + + py::class_, OpDef>(m, "RemoteRecv") + .def(py::init<>()) + .def_readwrite("key", &RemoteRecv::key) + .def_readwrite("addr", &RemoteRecv::addr) + .def_readwrite("port", &RemoteRecv::port) + .def_readwrite("rank_from", &RemoteRecv::rank_from) + .def_readwrite("shape", &RemoteRecv::shape) + .def_readwrite("cn", &RemoteRecv::cn) + .def_readwrite("dtype", &RemoteRecv::dtype); + + py::class_, OpDef>(m, "ParamPackSplit") + .def(py::init<>()) + .def_readwrite("offsets", &ParamPackSplit::offsets) + .def_readwrite("shapes", &ParamPackSplit::shapes); + + py::class_, OpDef>(m, "ParamPackConcat") + .def(py::init<>()) + .def_readwrite("offsets", &ParamPackConcat::offsets); + + py::class_, OpDef>(m, "BackwardGraph"); + py::class_, OpDef>(m, "CondTake") + .def(py::init<>()); + + py::class_, OpDef>(m, "NMSKeep") + .def(py::init()) + .def_readwrite("iou_thresh", &NMSKeep::iou_thresh) + .def_readwrite("max_output", &NMSKeep::max_output); +} diff --git a/imperative/python/src/ops.h b/imperative/python/src/ops.h new file mode 100644 index 0000000000000000000000000000000000000000..900b89e1aab14f66968330e6c92f90343c19be61 --- /dev/null +++ b/imperative/python/src/ops.h @@ -0,0 +1,5 @@ +#pragma once + +#include "./helper.h" + +void init_ops(pybind11::module m); diff --git a/imperative/python/src/utils.cpp b/imperative/python/src/utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b0e615a437e63d9aade2a1d17f2c1bc56ec0aa8b --- /dev/null +++ b/imperative/python/src/utils.cpp @@ -0,0 +1,236 @@ +#include "utils.h" + +#include +#include +#include +#include "./imperative_rt.h" +#include "megbrain/common.h" +#include "megbrain/comp_node.h" +#include "megbrain/imperative/blob_manager.h" +#include "megbrain/imperative/profiler.h" +#include "megbrain/serialization/helper.h" + +#if MGB_ENABLE_OPR_MM +#include "megbrain/opr/mm_handler.h" +#endif + +namespace py = pybind11; + +namespace { + +bool g_global_finalized = false; + +class LoggerWrapper { +public: + using LogLevel = mgb::LogLevel; + using LogHandler = mgb::LogHandler; + static void set_log_handler(py::object logger_p) { + logger = logger_p; + mgb::set_log_handler(py_log_handler); + } + static LogLevel set_log_level(LogLevel log_level) { + return mgb::set_log_level(log_level); + } + +private: + static py::object logger; + static void py_log_handler(mgb::LogLevel level, const char* file, + const char* func, int line, const char* fmt, + va_list ap) { + using mgb::LogLevel; + + MGB_MARK_USED_VAR(file); + MGB_MARK_USED_VAR(func); + MGB_MARK_USED_VAR(line); + + if (g_global_finalized) + return; + + const char* py_type; + switch (level) { + case LogLevel::DEBUG: + py_type = "debug"; + break; + case LogLevel::INFO: + py_type = "info"; + break; + case LogLevel::WARN: + py_type = "warning"; + break; + case LogLevel::ERROR: + py_type = "error"; + break; + default: + throw std::runtime_error("bad log level"); + } + + std::string msg = mgb::svsprintf(fmt, ap); + auto do_log = [msg = msg, py_type]() { + if (logger.is_none()) + return; + py::object _call = logger.attr(py_type); + _call(py::str(msg)); + }; + if (PyGILState_Check()) { + do_log(); + } else { + py_task_q.add_task(do_log); + } + } +}; +py::object LoggerWrapper::logger = py::none{}; + +uint32_t _get_dtype_num(py::object dtype) { + return static_cast(npy::dtype_np2mgb(dtype.ptr()).enumv()); +} + +py::bytes _get_serialized_dtype(py::object dtype) { + std::string sdtype; + auto write = [&sdtype](const void* data, size_t size) { + auto pos = sdtype.size(); + sdtype.resize(pos + size); + memcpy(&sdtype[pos], data, size); + }; + mgb::serialization::serialize_dtype(npy::dtype_np2mgb(dtype.ptr()), write); + return py::bytes(sdtype.data(), sdtype.size()); +} + +int fork_exec_impl(const std::string& arg0, const std::string& arg1, + const std::string& arg2) { +#ifdef WIN32 + STARTUPINFO si; + PROCESS_INFORMATION pi; + ZeroMemory(&si, sizeof(si)); + si.cb = sizeof(si); + ZeroMemory(&pi, sizeof(pi)); + auto args_str = " " + arg1 + " " + arg2; + + // Start the child process. + if (!CreateProcess(arg0.c_str(), // exe name + const_cast(args_str.c_str()), // Command line + NULL, // Process handle not inheritable + NULL, // Thread handle not inheritable + FALSE, // Set handle inheritance to FALSE + 0, // No creation flags + NULL, // Use parent's environment block + NULL, // Use parent's starting directory + &si, // Pointer to STARTUPINFO structure + &pi) // Pointer to PROCESS_INFORMATION structure + ) { + mgb_log_warn("CreateProcess failed (%lu).\n", GetLastError()); + fprintf(stderr, "[megbrain] failed to execl %s [%s, %s]\n", + arg0.c_str(), arg1.c_str(), arg2.c_str()); + __builtin_trap(); + } + return pi.dwProcessId; +#else + auto pid = fork(); + if (!pid) { + execl(arg0.c_str(), arg0.c_str(), arg1.c_str(), arg2.c_str(), nullptr); + fprintf(stderr, "[megbrain] failed to execl %s [%s, %s]: %s\n", + arg0.c_str(), arg1.c_str(), arg2.c_str(), std::strerror(errno)); + std::terminate(); + } + mgb_assert(pid > 0, "failed to fork: %s", std::strerror(errno)); + return pid; +#endif +} + +} // namespace + +void init_utils(py::module m) { + auto atexit = py::module::import("atexit"); + atexit.attr("register")(py::cpp_function([]() { + g_global_finalized = true; + })); + + py::class_>(m, "AtomicUint64") + .def(py::init<>()) + .def(py::init()) + .def("load", + [](const std::atomic& self) { return self.load(); }) + .def("store", [](std::atomic& self, + uint64_t value) { return self.store(value); }) + .def("fetch_add", + [](std::atomic& self, uint64_t value) { + return self.fetch_add(value); + }) + .def("fetch_sub", + [](std::atomic& self, uint64_t value) { + return self.fetch_sub(value); + }) + .def(py::self += uint64_t()) + .def(py::self -= uint64_t()); + + // FIXME!!! Should add a submodule instead of using a class for logger + py::class_ logger(m, "Logger"); + logger.def(py::init<>()) + .def_static("set_log_level", &LoggerWrapper::set_log_level) + .def_static("set_log_handler", &LoggerWrapper::set_log_handler); + + py::enum_(logger, "LogLevel") + .value("Debug", LoggerWrapper::LogLevel::DEBUG) + .value("Info", LoggerWrapper::LogLevel::INFO) + .value("Warn", LoggerWrapper::LogLevel::WARN) + .value("Error", LoggerWrapper::LogLevel::ERROR); + + m.def("_get_dtype_num", &_get_dtype_num, + "Convert numpy dtype to internal dtype"); + + m.def("_get_serialized_dtype", &_get_serialized_dtype, + "Convert numpy dtype to internal dtype for serialization"); + + m.def("_get_device_count", &mgb::CompNode::get_device_count, + "Get total number of specific devices on this system"); + + using mgb::imperative::Profiler; + + py::class_(m, "ProfilerImpl") + .def(py::init<>()) + .def(py::init()) + .def("enable", + [](Profiler& profiler) -> Profiler& { + profiler.enable(); + return profiler; + }) + .def("disable", + [](Profiler& profiler) { + if (profiler.get_dump_count() == 0) { + profiler.dump(); + } + profiler.disable(); + }) + .def("dump", + [](Profiler& profiler, std::optional path) { + if (path.has_value()) { + profiler.dump(path.value()); + } else { + profiler.dump(); + } + }, + py::arg("path") = std::optional()); + +#if MGB_ENABLE_OPR_MM + m.def("create_mm_server", &create_zmqrpc_server, py::arg("addr"), + py::arg("port") = 0); +#else + m.def("create_mm_server", []() {}); +#endif + + // Debug code, internal only + m.def("_set_defrag", [](bool enable) { + mgb::imperative::BlobManager::inst()->set_enable(enable); + }); + m.def("_defrag", [](const mgb::CompNode& cn) { + mgb::imperative::BlobManager::inst()->defrag(cn); + }); + m.def("_set_fork_exec_path_for_timed_func", [](const std::string& arg0, + const ::std::string arg1) { + using namespace std::placeholders; + mgb::sys::TimedFuncInvoker::ins().set_fork_exec_impl(std::bind( + fork_exec_impl, std::string{arg0}, std::string{arg1}, _1)); + }); + m.def("_timed_func_exec_cb", [](const std::string& user_data){ + mgb::sys::TimedFuncInvoker::ins().fork_exec_impl_mainloop(user_data.c_str()); + }); +} diff --git a/imperative/python/src/utils.h b/imperative/python/src/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..0a4d69e560401111e3abd2dcb39e9cb77e99bd0c --- /dev/null +++ b/imperative/python/src/utils.h @@ -0,0 +1,5 @@ +#pragma once + +#include "./helper.h" + +void init_utils(pybind11::module m); diff --git a/imperative/python/test/integration/mnist_model_with_test.mge b/imperative/python/test/integration/mnist_model_with_test.mge new file mode 100644 index 0000000000000000000000000000000000000000..126837d41f24151b9bad560e0b496908e1e5e9af Binary files /dev/null and b/imperative/python/test/integration/mnist_model_with_test.mge differ diff --git a/imperative/python/test/integration/mnist_model_with_test_cpu.mge b/imperative/python/test/integration/mnist_model_with_test_cpu.mge new file mode 100644 index 0000000000000000000000000000000000000000..b0e8ad5c98b17584cbbcdf50c395c553ca1f74ef Binary files /dev/null and b/imperative/python/test/integration/mnist_model_with_test_cpu.mge differ diff --git a/imperative/python/test/integration/test_advance_indexing.py b/imperative/python/test/integration/test_advance_indexing.py new file mode 100644 index 0000000000000000000000000000000000000000..261f6daf8110d3adfe586a5c252b4b9259c08b4a --- /dev/null +++ b/imperative/python/test/integration/test_advance_indexing.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +import megengine +import megengine.optimizer as optimizer +from megengine import Parameter, tensor +from megengine.module import Module + + +class Simple(Module): + def __init__(self): + super().__init__() + self.a = Parameter(1.0, dtype=np.float32) + + def forward(self, x, y): + x = x[y] * self.a + return x + + +class Simple2(Module): + def __init__(self): + super().__init__() + self.a = Parameter(1.0, dtype=np.float32) + + def forward(self, x): + x = x[1, ..., :, 0:4:2, 0:2] * self.a + return x + + +def test_advance_indexing(): + net = Simple() + + optim = optimizer.SGD(net.parameters(), lr=1.0) + optim.zero_grad() + + dshape = (10, 10) + raw_data = np.arange(100).reshape(dshape).astype(np.float32) + raw_mask = (np.random.random_sample(dshape) > 0.5).astype(np.bool_) + data = tensor(raw_data) + mask = tensor(raw_mask) + answer = 1.0 - raw_data[raw_mask].sum() + with optim.record(): + loss = net(data, mask).sum() + optim.backward(loss) + optim.step() + np.testing.assert_almost_equal(net.a.numpy(), np.array([answer]).astype(np.float32)) + + +def test_advance_indexing_with_subtensor(): + net = Simple2() + + optim = optimizer.SGD(net.parameters(), lr=1.0) + optim.zero_grad() + + dshape = (2, 3, 4, 3, 4, 2) + raw_data = np.arange(576).reshape(dshape).astype(np.float32) + data = tensor(raw_data) + answer = 1.0 - raw_data[1, ..., :, 0:4:2, 0:2].sum() + with optim.record(): + loss = net(data).sum() + optim.backward(loss) + optim.step() + np.testing.assert_almost_equal(net.a.numpy(), np.array([answer]).astype(np.float32)) diff --git a/imperative/python/test/integration/test_ai.py b/imperative/python/test/integration/test_ai.py new file mode 100644 index 0000000000000000000000000000000000000000..3e40bac9271b6894fdb3e4333794b6d9664396e8 --- /dev/null +++ b/imperative/python/test/integration/test_ai.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +import megengine +import megengine.optimizer as optimizer +from megengine import Parameter, tensor +from megengine.module import Module + + +class Simple(Module): + def __init__(self): + super().__init__() + self.a = Parameter(1.0, dtype=np.float32) + + def forward(self, x): + x = x[:, 0] * self.a + return x + + +def test_ai(): + net = Simple() + + optim = optimizer.SGD(net.parameters(), lr=1.0) + optim.zero_grad() + + dshape = (10, 10) + data = tensor(np.ones(dshape).astype(np.float32)) + with optim.record(): + loss = net(data).sum() + optim.backward(loss) + optim.step() + np.testing.assert_almost_equal( + net.a.numpy(), np.array([1.0 - dshape[0]]).astype(np.float32) + ) diff --git a/imperative/python/test/integration/test_bn.py b/imperative/python/test/integration/test_bn.py new file mode 100644 index 0000000000000000000000000000000000000000..779b2ef9e77b4900e024d5eb81d6085f6e3427e1 --- /dev/null +++ b/imperative/python/test/integration/test_bn.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np +import pytest + +import megengine +import megengine.optimizer as optimizer +from megengine import Parameter, tensor +from megengine.module import BatchNorm2d + + +def test_frozen_bn(): + nchannel = 3 + m = BatchNorm2d(nchannel, freeze=True) + + saved_var = m.running_var.numpy() + saved_mean = m.running_mean.numpy() + saved_wt = m.weight.numpy() + saved_bias = m.bias.numpy() + + optim = optimizer.SGD(m.parameters(), lr=1.0) + optim.zero_grad() + + data = np.random.random((6, nchannel, 2, 2)).astype("float32") + with optim.record(): + loss = m(data).mean() + optim.backward(loss) + optim.step() + + np.testing.assert_equal(m.running_var.numpy(), saved_var) + np.testing.assert_equal(m.running_mean.numpy(), saved_mean) + np.testing.assert_equal(m.weight.numpy(), saved_wt) + np.testing.assert_equal(m.bias.numpy(), saved_bias) + np.testing.assert_almost_equal(loss.numpy(), data.mean(), 5) + + +def test_bn_no_track_stat(): + nchannel = 3 + m = BatchNorm2d(nchannel, track_running_stats=False) + + optim = optimizer.SGD(m.parameters(), lr=1.0) + optim.zero_grad() + + data = np.random.random((6, nchannel, 2, 2)).astype("float32") + with optim.record(): + loss = m(data).sum() + optim.backward(loss) + optim.step() + + +def test_bn_no_track_stat2(): + nchannel = 3 + m = BatchNorm2d(nchannel) # Init with track_running_stat = True + m.track_running_stats = False + + # m.running_var and m.running_mean created during init time + saved_var = m.running_var.numpy() + assert saved_var is not None + saved_mean = m.running_mean.numpy() + assert saved_mean is not None + + optim = optimizer.SGD(m.parameters(), lr=1.0) + optim.zero_grad() + + data = np.random.random((6, nchannel, 2, 2)).astype("float32") + with optim.record(): + loss = m(data).sum() + optim.backward(loss) + optim.step() + + np.testing.assert_equal(m.running_var.numpy(), saved_var) + np.testing.assert_equal(m.running_mean.numpy(), saved_mean) + + +def test_bn_no_track_stat3(): + nchannel = 3 + m = BatchNorm2d(nchannel, track_running_stats=False) + m.track_running_stats = True + data = np.random.random((6, nchannel, 2, 2)).astype("float32") + with pytest.raises(Exception): + m(data) diff --git a/imperative/python/test/integration/test_converge.py b/imperative/python/test/integration/test_converge.py new file mode 100644 index 0000000000000000000000000000000000000000..7778c6a9eb1cad7a51ef496f703b1c645a3ec4f4 --- /dev/null +++ b/imperative/python/test/integration/test_converge.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import itertools + +import numpy as np +import pytest + +import megengine as mge +import megengine.functional as F +from megengine import Tensor +from megengine.module import Linear, Module +from megengine.optimizer import SGD + +batch_size = 64 +data_shape = (batch_size, 2) +label_shape = (batch_size,) + + +def minibatch_generator(): + while True: + inp_data = np.zeros((batch_size, 2)) + label = np.zeros(batch_size, dtype=np.int32) + for i in range(batch_size): + # [x0, x1], sampled from U[-1, 1] + inp_data[i, :] = np.random.rand(2) * 2 - 1 + label[i] = 0 if np.prod(inp_data[i]) < 0 else 1 + yield inp_data.astype(np.float32), label.astype(np.int32) + + +def calculate_precision(data: np.ndarray, pred: np.ndarray) -> float: + """ Calculate precision for given data and prediction. + + :type data: [[x, y], ...] + :param data: Input data + :type pred: [[x_pred, y_pred], ...] + :param pred: Network output data + """ + correct = 0 + assert len(data) == len(pred) + for inp_data, pred_output in zip(data, pred): + label = 0 if np.prod(inp_data) < 0 else 1 + pred_label = np.argmax(pred_output) + if pred_label == label: + correct += 1 + return float(correct) / len(data) + + +class XORNet(Module): + def __init__(self): + self.mid_layers = 14 + self.num_class = 2 + super().__init__() + + self.fc0 = Linear(self.num_class, self.mid_layers, bias=True) + self.fc1 = Linear(self.mid_layers, self.mid_layers, bias=True) + + self.fc2 = Linear(self.mid_layers, self.num_class, bias=True) + + def forward(self, x): + x = self.fc0(x) + x = F.tanh(x) + x = self.fc1(x) + x = F.tanh(x) + x = self.fc2(x) + return x + + +def test_training_converge(): + net = XORNet() + opt = SGD( + net.parameters(requires_grad=True), lr=0.01, momentum=0.9, weight_decay=5e-4 + ) + + def train(data, label): + with opt.record(): + pred = net(data) + loss = F.cross_entropy_with_softmax(pred, label) + opt.backward(loss) + return loss + + def infer(data): + return net(data) + + train_dataset = minibatch_generator() + losses = [] + + for data, label in itertools.islice(train_dataset, 2000): + data = Tensor(data, dtype=np.float32) + label = Tensor(label, dtype=np.int32) + opt.zero_grad() + loss = train(data, label) + opt.step() + losses.append(loss.numpy()) + + assert np.mean(losses[-100:]) < 0.1, "Final training Loss must be low enough" + + ngrid = 10 + x = np.linspace(-1.0, 1.0, ngrid) + xx, yy = np.meshgrid(x, x) + xx = xx.reshape((ngrid * ngrid, 1)) + yy = yy.reshape((ngrid * ngrid, 1)) + data = np.concatenate((xx, yy), axis=1).astype(np.float32) + + pred = infer(data).numpy() + precision = calculate_precision(data, pred) + assert precision == 1.0, "Test precision must be high enough, get {}".format( + precision + ) diff --git a/imperative/python/test/integration/test_correctness.py b/imperative/python/test/integration/test_correctness.py new file mode 100644 index 0000000000000000000000000000000000000000..73d3fbed247021c1a708a5645bd8814f09144bb2 --- /dev/null +++ b/imperative/python/test/integration/test_correctness.py @@ -0,0 +1,194 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import os +import re +import subprocess +import sys + +import numpy as np +import pytest + +import megengine as mge +import megengine.functional as F +from megengine.functional.debug_param import set_conv_execution_strategy +from megengine.module import AvgPool2d, BatchNorm2d, Conv2d, Linear, Module +from megengine.optimizer import SGD +from megengine.tensor import Tensor +from megengine.test import assertTensorClose + + +def get_gpu_name(): + try: + gpu_info = subprocess.check_output( + ["nvidia-smi", "--query-gpu=gpu_name", "--format=csv,noheader"] + ) + gpu_info = gpu_info.decode("ascii").split("\n")[0] + except: + gpu_info = "None" + return gpu_info + + +def get_cpu_name(): + cpu_info = "None" + try: + cpu_info = subprocess.check_output(["cat", "/proc/cpuinfo"]).decode("ascii") + for line in cpu_info.split("\n"): + if "model name" in line: + return re.sub(".*model name.*:", "", line, 1).strip() + except: + pass + return cpu_info + + +def get_xpu_name(): + if mge.is_cuda_available(): + return get_gpu_name() + else: + return get_cpu_name() + + +class MnistNet(Module): + def __init__(self, has_bn=False): + super().__init__() + self.conv0 = Conv2d(1, 20, kernel_size=5, bias=True) + self.pool0 = AvgPool2d(2) + self.conv1 = Conv2d(20, 20, kernel_size=5, bias=True) + self.pool1 = AvgPool2d(2) + self.fc0 = Linear(20 * 4 * 4, 500, bias=True) + self.fc1 = Linear(500, 10, bias=True) + self.bn0 = None + self.bn1 = None + if has_bn: + self.bn0 = BatchNorm2d(20) + self.bn1 = BatchNorm2d(20) + + def forward(self, x): + x = self.conv0(x) + if self.bn0: + x = self.bn0(x) + x = F.relu(x) + x = self.pool0(x) + x = self.conv1(x) + if self.bn1: + x = self.bn1(x) + x = F.relu(x) + x = self.pool1(x) + x = F.flatten(x, 1) + x = self.fc0(x) + x = F.relu(x) + x = self.fc1(x) + return x + + +def train(data, label, net, opt): + with opt.record(): + pred = net(data) + loss = F.cross_entropy_with_softmax(pred, label) + opt.backward(loss) + return loss + + +def update_model(model_path): + """ + Update the dumped model with test cases for new reference values. + + The model with pre-trained weights is trained for one iter with the test data attached. + The loss and updated net state dict is dumped. + + .. code-block:: python + + from test_correctness import update_model + update_model('mnist_model_with_test.mge') # for gpu + update_model('mnist_model_with_test_cpu.mge') # for cpu + + """ + net = MnistNet(has_bn=True) + checkpoint = mge.load(model_path) + net.load_state_dict(checkpoint["net_init"]) + lr = checkpoint["sgd_lr"] + opt = SGD(net.parameters(), lr=lr) + + data = Tensor(checkpoint["data"], dtype=np.float32) + label = Tensor(checkpoint["label"], dtype=np.int32) + + opt.zero_grad() + loss = train(data, label, net=net, opt=opt) + opt.step() + + xpu_name = get_xpu_name() + + checkpoint.update( + {"net_updated": net.state_dict(), "loss": loss.numpy(), "xpu": xpu_name} + ) + mge.save(checkpoint, model_path) + + +def run_test( + model_path, use_jit, use_symbolic, sublinear_memory_config=None, max_err=None, +): + + """ + Load the model with test cases and run the training for one iter. + The loss and updated weights are compared with reference value to verify the correctness. + + Dump a new file with updated result by calling update_model + if you think the test fails due to numerical rounding errors instead of bugs. + Please think twice before you do so. + + """ + net = MnistNet(has_bn=True) + checkpoint = mge.load(model_path) + net.load_state_dict(checkpoint["net_init"]) + lr = checkpoint["sgd_lr"] + opt = SGD(net.parameters(), lr=lr) + + data = Tensor(checkpoint["data"], dtype=np.float32) + label = Tensor(checkpoint["label"], dtype=np.int32) + + if max_err is None: + max_err = 1e-5 + + train_func = train + if use_jit: + train_func = jit.trace( + train_func, + symbolic=use_symbolic, + sublinear_memory_config=sublinear_memory_config, + ) + + opt.zero_grad() + loss = train_func(data, label, net=net, opt=opt) + opt.step() + + assertTensorClose(loss.numpy(), checkpoint["loss"], max_err=max_err) + + for param, param_ref in zip( + net.state_dict().items(), checkpoint["net_updated"].items() + ): + assert param[0] == param_ref[0] + assertTensorClose(param[1], param_ref[1], max_err=max_err) + + +def test_correctness(): + if mge.is_cuda_available(): + model_name = "mnist_model_with_test.mge" + else: + model_name = "mnist_model_with_test_cpu.mge" + model_path = os.path.join(os.path.dirname(__file__), model_name) + set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE") + + run_test(model_path, False, False, max_err=1e-5) + # run_test(model_path, True, False) + # run_test(model_path, True, True) + + # sublinear + # config = SublinearMemoryConfig(genetic_nr_iter=10) + # run_test( + # model_path, True, True, sublinear_memory_config=config, max_err=1e-5, + # ) diff --git a/imperative/python/test/integration/test_detach.py b/imperative/python/test/integration/test_detach.py new file mode 100644 index 0000000000000000000000000000000000000000..0d0b3d5c7fbed7ed9ed9c3d6268d031b1abe99a0 --- /dev/null +++ b/imperative/python/test/integration/test_detach.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +import megengine +import megengine.optimizer as optimizer +from megengine import Parameter, tensor +from megengine.module import Module + + +class Simple(Module): + def __init__(self): + super().__init__() + self.a = Parameter(1.0, dtype=np.float32) + self.b = Parameter(1.0, dtype=np.float32) + + def forward(self, x): + x = x * self.a + x = x.detach() * self.b + return x + + +def test_detach(): + net = Simple() + + optim = optimizer.SGD(net.parameters(), lr=1.0) + optim.zero_grad() + + dshape = (10, 10) + data = tensor(np.ones(dshape).astype(np.float32)) + with optim.record(): + loss = net(data).sum() + optim.backward(loss) + optim.step() + np.testing.assert_equal(net.a.numpy(), np.array([1.0]).astype(np.float32)) + np.testing.assert_equal( + net.b.numpy(), np.array([1.0 - 10.0 * 10.0]).astype(np.float32) + ) diff --git a/imperative/python/test/integration/test_dp_correctness.py b/imperative/python/test/integration/test_dp_correctness.py new file mode 100644 index 0000000000000000000000000000000000000000..5719136942cced84a8e17f0bc0351f1b5d5c618c --- /dev/null +++ b/imperative/python/test/integration/test_dp_correctness.py @@ -0,0 +1,203 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import multiprocessing as mp +import os +import re +import subprocess +import sys +from math import ceil + +import numpy as np +import pytest + +import megengine as mge +import megengine.distributed as dist +import megengine.functional as F +from megengine.device import get_default_device, set_default_device +from megengine.functional.debug_param import set_conv_execution_strategy +from megengine.module import AvgPool2d, BatchNorm2d, Conv2d, Linear, Module +from megengine.optimizer import SGD +from megengine.tensor import Tensor +from megengine.test import assertTensorClose + +p_num = 4 + + +def get_gpu_name(): + try: + gpu_info = subprocess.check_output( + ["nvidia-smi", "--query-gpu=gpu_name", "--format=csv,noheader"] + ) + gpu_info = gpu_info.decode("ascii").split("\n")[0] + except: + gpu_info = "None" + return gpu_info + + +def get_cpu_name(): + cpu_info = "None" + try: + cpu_info = subprocess.check_output(["cat", "/proc/cpuinfo"]).decode("ascii") + for line in cpu_info.split("\n"): + if "model name" in line: + return re.sub(".*model name.*:", "", line, 1).strip() + except: + pass + return cpu_info + + +def get_xpu_name(): + if mge.is_cuda_available(): + return get_gpu_name() + else: + return get_cpu_name() + + +class MnistNet(Module): + def __init__(self, has_bn=True): + super().__init__() + self.conv0 = Conv2d(1, 20, kernel_size=5, bias=True) + self.pool0 = AvgPool2d(2) + self.conv1 = Conv2d(20, 20, kernel_size=5, bias=True) + self.pool1 = AvgPool2d(2) + self.fc0 = Linear(20 * 4 * 4, 500, bias=True) + self.fc1 = Linear(500, 10, bias=True) + self.bn0 = None + self.bn1 = None + if has_bn: + self.bn0 = BatchNorm2d(20) + self.bn1 = BatchNorm2d(20) + + def forward(self, x): + x = self.conv0(x) + if self.bn0: + x = self.bn0(x) + x = F.relu(x) + x = self.pool0(x) + x = self.conv1(x) + if self.bn1: + x = self.bn1(x) + x = F.relu(x) + x = self.pool1(x) + x = F.flatten(x, 1) + x = self.fc0(x) + x = F.relu(x) + x = self.fc1(x) + return x + + +def train(data, label, net, opt): + with opt.record(): + pred = net(data) + loss = F.cross_entropy_with_softmax(pred, label) + opt.backward(loss) + return loss + + +def update_model(model_path): + """ + Update the dumped model with test cases for new reference values. + + The model with pre-trained weights is trained for one iter with the test data attached. + The loss and updated net state dict is dumped. + + .. code-block:: python + + from test_correctness import update_model + update_model('mnist_model_with_test.mge') # for gpu + update_model('mnist_model_with_test_cpu.mge') # for cpu + + """ + net = MnistNet(has_bn=True) + checkpoint = mge.load(model_path) + net.load_state_dict(checkpoint["net_init"]) + lr = checkpoint["sgd_lr"] + opt = SGD(net.parameters(), lr=lr) + + data = Tensor(checkpoint["data"], dtype=np.float32) + label = Tensor(checkpoint["label"], dtype=np.int32) + + opt.zero_grad() + loss = train(data, label, net=net, opt=opt) + opt.step() + + xpu_name = get_xpu_name() + + checkpoint.update( + {"net_updated": net.state_dict(), "loss": loss.numpy(), "xpu": xpu_name} + ) + mge.serialization.save(checkpoint, model_path) + + +def run_test( + model_path, use_jit, use_symbolic, sublinear_memory_config=None, max_err=None, +): + + """ + Load the model with test cases and run the training for one iter. + The loss and updated weights are compared with reference value to verify the correctness. + + Dump a new file with updated result by calling update_model + if you think the test fails due to numerical rounding errors instead of bugs. + Please think twice before you do so. + + """ + checkpoint = mge.load(model_path) + data = checkpoint["data"] + label = checkpoint["label"] + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker(rank, max_err): + dist.init_process_group("localhost", port, p_num, rank, rank) + set_default_device(device="gpu{}".format(dist.get_rank())) + net = MnistNet(has_bn=True) + net.load_state_dict(checkpoint["net_init"]) + lr = checkpoint["sgd_lr"] + opt = SGD(net.parameters(), reduce_method="mean", lr=lr) + + # use same data and label for all gpu's + # such that the result does not depend on number of gpu + data_train = Tensor(data) + label_train = Tensor(label) + + train_func = train + + opt.zero_grad() + loss = train_func(data_train, label_train, net=net, opt=opt) + opt.step() + + print("{} loss {}".format(get_default_device(), loss.numpy()[0])) + assertTensorClose(loss.numpy(), checkpoint["loss"], max_err=max_err) + + if dist.get_rank(): + return + for param, param_ref in zip( + net.state_dict().items(), checkpoint["net_updated"].items() + ): + assert param[0] == param_ref[0] + assertTensorClose(param[1], param_ref[1], max_err=max_err) + + procs = [] + for rank in range(p_num): + p = mp.Process(target=worker, args=(rank, max_err,)) + p.start() + procs.append(p) + + for p in procs: + p.join(20) + assert p.exitcode == 0 + + +@pytest.mark.isolated_distributed +def test_dp_correctness(): + model_name = "mnist_model_with_test.mge" + model_path = os.path.join(os.path.dirname(__file__), model_name) + set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE") + run_test(model_path, False, False, max_err=1e-5) diff --git a/imperative/python/test/integration/test_hello_world.py b/imperative/python/test/integration/test_hello_world.py new file mode 100644 index 0000000000000000000000000000000000000000..033d28544b6a1254163abb598382ec68e4849e88 --- /dev/null +++ b/imperative/python/test/integration/test_hello_world.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import subprocess + +import numpy as np +import pytest + +import megengine +import megengine.optimizer as optimizer +from megengine import Parameter, tensor +from megengine.module import Module + + +class Simple(Module): + def __init__(self): + super().__init__() + self.a = Parameter(1.23, dtype=np.float32) + + def forward(self, x): + x = x * self.a + return x + + +def test_hello_world(): + net = Simple() + + optim = optimizer.SGD(net.parameters(), lr=1.0) + optim.zero_grad() + + data = tensor([2.34]) + with optim.record(): + loss = net(data) + optim.backward(loss) + optim.step() + np.testing.assert_almost_equal( + net.a.numpy(), np.array([1.23 - 2.34]).astype(np.float32) + ) diff --git a/imperative/python/test/integration/test_lr_scheduler.py b/imperative/python/test/integration/test_lr_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..a0f788f6887a946de5c12da526cbf32ce3f18e6c --- /dev/null +++ b/imperative/python/test/integration/test_lr_scheduler.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from bisect import bisect_right + +import numpy as np + +from megengine import Parameter, tensor +from megengine.module import Module +from megengine.optimizer import SGD, MultiStepLR + + +class Simple(Module): + def __init__(self): + super().__init__() + self.a = Parameter(1.23, dtype=np.float32) + + def forward(self, x): + x = x * self.a + return x + + +def test_multi_step_lr(): + net = Simple() + opt = SGD(net.parameters(), lr=0.01, momentum=0.9) + scheduler = MultiStepLR(opt, [3, 6, 8]) + + lr = np.array(0.01, dtype=np.float32) + for i in range(10): + for group in opt.param_groups: + np.testing.assert_almost_equal( + np.array(group["lr"], dtype=np.float32), + (lr * 0.1 ** bisect_right([3, 6, 8], i)).astype(np.float32), + ) + scheduler.step() diff --git a/imperative/python/test/integration/test_optimizer.py b/imperative/python/test/integration/test_optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..388a881485e4e63e69a6c84b0410ac430277c50a --- /dev/null +++ b/imperative/python/test/integration/test_optimizer.py @@ -0,0 +1,206 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +import megengine.functional as F +from megengine import Parameter, optimizer +from megengine.module import Linear, Module +from megengine.tensor import TensorDict, tensor + + +class MLP(Module): + def __init__(self): + super().__init__() + self.dense0 = Linear(28, 50) + self.dense1 = Linear(50, 20) + + def forward(self, x): + x = self.dense0(x) + x = F.relu(x) + x = self.dense1(x) + return x + + +class Simple(Module): + def __init__(self): + super().__init__() + self.a = Parameter(1.23, dtype=np.float32) + + def forward(self, x): + x = x * self.a + return x + + +def _test_optimizer(opt_str, test_case, check_class, update_lr=False): + iter_num = 3 + net = Simple() + opt = getattr(optimizer, opt_str)(net.parameters(), **test_case) + check_func = check_class(net, **test_case) + + step = 0 + data_shape = (2, 28) + + for i in range(iter_num): + if update_lr and i == 1: # change learning rate + for group in opt.param_groups: + group["lr"] += 0.01 + check_func.lr += 0.01 + data = tensor(np.random.random(data_shape).astype(np.float32)) + + opt.zero_grad() + with opt.record(): + pred = net(data) + loss = pred.sum() + opt.backward(loss) + + ori_params = TensorDict() + for param in net.parameters(): + ori_params[param] = np.copy(param.numpy()) + opt.step() + step += 1 + check_func(ori_params, net.parameters(), step) + + +def test_sgd(): + class CheckValue: + def __init__(self, net, **kwarg): + self.slots = TensorDict() + for param in net.parameters(): + self.slots[param] = np.zeros(param.shape).astype(np.float32) + for k, v in kwarg.items(): + setattr(self, k, v) + + def __call__(self, ori_params, new_params, step): + for param in new_params: + grad = param.grad.numpy() + if hasattr(self, "momentum"): + self.slots[param] = grad + self.slots[param] * self.momentum + delta = -self.lr * self.slots[param] + else: + delta = -self.lr * grad + np.testing.assert_almost_equal(param.numpy(), ori_params[param] + delta) + + cases = [ + {"momentum": 0.9, "lr": 0.01}, # SGD with momentum + {"lr": 0.01}, # simple SGD + {"weight_decay": 0.1, "lr": 0.01}, # with weight_decay + ] + for case in cases: + _test_optimizer("SGD", case, CheckValue) + _test_optimizer("SGD", case, CheckValue, update_lr=True) + + +def test_adam(): + class CheckValue: + def __init__(self, net, **kwarg): + self.m_slots = TensorDict() + self.v_slots = TensorDict() + for param in net.parameters(): + self.m_slots[param] = np.zeros(param.shape).astype(np.float32) + self.v_slots[param] = np.zeros(param.shape).astype(np.float32) + for k, v in kwarg.items(): + setattr(self, k, v) + + def __call__(self, ori_params, new_params, step): + for param in new_params: + grad = param.grad.numpy() + m = self.m_slots[param] + v = self.v_slots[param] + m *= self.betas[0] + m += (1 - self.betas[0]) * grad + v *= self.betas[1] + v += (1 - self.betas[1]) * grad * grad + delta = (m / (1 - self.betas[0] ** step)) / ( + np.sqrt(v / (1 - self.betas[1] ** step)) + self.eps + ) + np.testing.assert_almost_equal( + param.numpy(), ori_params[param] - self.lr * delta + ) + + cases = [ + {"betas": (0.8, 0.9), "eps": 1e-04, "lr": 0.01}, + { + "betas": (0.8, 0.9), + "eps": 1e-04, + "lr": 0.01, + "weight_decay": 0.1, + }, # with weight_decay + ] + for case in cases: + _test_optimizer("Adam", case, CheckValue) + _test_optimizer("Adam", case, CheckValue, update_lr=True) + + +def test_adagrad(): + class CheckValue: + def __init__(self, net, **kwarg): + self.s_slots = TensorDict() + for param in net.parameters(): + self.s_slots[param] = np.zeros(param.shape).astype(np.float32) + for k, v in kwarg.items(): + setattr(self, k, v) + + def __call__(self, ori_params, new_params, step): + for param in new_params: + grad = param.grad.numpy() + self.s_slots[param] += grad ** 2 + delta = grad / (self.s_slots[param] + self.eps) ** 0.5 + delta *= -(self.lr / (1 + (step - 1) * self.lr_decay)) + np.testing.assert_almost_equal(param.numpy(), ori_params[param] + delta) + + cases = [ + {"lr": 0.01, "eps": 1e-06, "lr_decay": 0.01}, + {"lr": 0.01, "eps": 1e-06, "lr_decay": 0.0}, # without lr_decay + { + "lr": 0.01, + "eps": 1e-06, + "lr_decay": 0.01, + "weight_decay": 0.1, + }, # with weight_decay + ] + for case in cases: + _test_optimizer("Adagrad", case, CheckValue) + _test_optimizer("Adagrad", case, CheckValue, update_lr=True) + + +def test_adadelta(): + class CheckValue: + def __init__(self, net, **kwarg): + self.s_slots = TensorDict() + self.a_slots = TensorDict() + for param in net.parameters(): + self.s_slots[param] = np.zeros(param.shape).astype(np.float32) + self.a_slots[param] = np.zeros(param.shape).astype(np.float32) + for k, v in kwarg.items(): + setattr(self, k, v) + + def __call__(self, ori_params, new_params, step): + for param in new_params: + grad = param.grad.numpy() + self.s_slots[param] = self.s_slots[param] * self.rho + grad ** 2 * ( + 1 - self.rho + ) + delta = ( + grad + * ((self.a_slots[param] + self.eps) ** 0.5) + / (self.s_slots[param] + self.eps) ** 0.5 + ) + self.a_slots[param] = self.a_slots[param] * self.rho + delta ** 2 * ( + 1 - self.rho + ) + delta *= -self.lr + np.testing.assert_almost_equal(param.numpy(), ori_params[param] + delta) + + cases = [ + {"lr": 1.0, "eps": 1e-06, "rho": 0.9}, + {"lr": 1.0, "eps": 1e-06, "rho": 0.9, "weight_decay": 0.9}, # with weight_decay + ] + for case in cases: + _test_optimizer("Adadelta", case, CheckValue) + _test_optimizer("Adadelta", case, CheckValue, update_lr=True) diff --git a/imperative/python/test/integration/test_save_load.py b/imperative/python/test/integration/test_save_load.py new file mode 100644 index 0000000000000000000000000000000000000000..11bbcf58a69dd2147a7c9b8afcc36a70884ee0eb --- /dev/null +++ b/imperative/python/test/integration/test_save_load.py @@ -0,0 +1,58 @@ +import numpy as np + +import megengine as mge +import megengine.optimizer as optimizer +from megengine import Parameter, tensor +from megengine.core.tensor.raw_tensor import RawTensor +from megengine.module import Module + + +class Simple(Module): + def __init__(self): + self.a = Parameter(1.23, dtype=np.float32) + + def forward(self, x): + x = x * self.a + return x + + +def test_save_load(): + net = Simple() + + optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) + optim.zero_grad() + + data = tensor([2.34]) + + with optim.record(): + loss = net(data) + optim.backward(loss) + + optim.step() + + model_name = "simple.pkl" + print("save to {}".format(model_name)) + + mge.save( + { + "name": "simple", + "state_dict": net.state_dict(), + "opt_state": optim.state_dict(), + }, + model_name, + ) + + # Load param to cpu + checkpoint = mge.load(model_name, map_location="cpu0") + mge.set_default_device("cpu0") + net = Simple() + net.load_state_dict(checkpoint["state_dict"]) + optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) + optim.load_state_dict(checkpoint["opt_state"]) + print("load done") + + with optim.record(): + loss = net([1.23]) + optim.backward(loss) + + optim.step() diff --git a/imperative/python/test/integration/test_sgd_momentum.py b/imperative/python/test/integration/test_sgd_momentum.py new file mode 100644 index 0000000000000000000000000000000000000000..33944150e1de1cdebb37a3a0eb1e37a688f54fce --- /dev/null +++ b/imperative/python/test/integration/test_sgd_momentum.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +import megengine +import megengine.optimizer as optimizer +from megengine import Parameter, tensor +from megengine.module import Module + + +class Simple(Module): + def __init__(self): + super().__init__() + self.a = Parameter(1.23, dtype=np.float32) + + def forward(self, x): + x = x * self.a + return x + + +def test_sgd_momentum(): + net = Simple() + + optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) + optim.zero_grad() + + data = tensor([2.34]) + + # do a step of train + with optim.record(): + loss = net(data) + optim.backward(loss) + optim.step() + + np.testing.assert_almost_equal(optim._state[net.a]["momentum_buffer"].numpy(), 2.34) + + # do a step of infer + loss = net(data) + np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) + + np.testing.assert_almost_equal(optim._state[net.a]["momentum_buffer"].numpy(), 2.34) + + # do a step of train + optim.zero_grad() + with optim.record(): + loss = net(data) + optim.backward(loss) + optim.step() + + np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5) + np.testing.assert_almost_equal( + optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34 + ) diff --git a/imperative/python/test/pytest.ini b/imperative/python/test/pytest.ini new file mode 100644 index 0000000000000000000000000000000000000000..da914aaacff993b4fa4723aeb86ad65af70fe953 --- /dev/null +++ b/imperative/python/test/pytest.ini @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +[pytest] +markers = + isolated_distributed: marks distributed tests that should runs without cuda use + in main thread (deselect with '-m "not "isolated_distributed"') diff --git a/imperative/python/test/unit/functional/__init__.py b/imperative/python/test/unit/functional/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1207b5d98cd3578bc39e9ce600a1254a434880c8 --- /dev/null +++ b/imperative/python/test/unit/functional/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/imperative/python/test/unit/functional/test_distributed.py b/imperative/python/test/unit/functional/test_distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..9ff2031907b51240faccb2ea30dd23619bb88d41 --- /dev/null +++ b/imperative/python/test/unit/functional/test_distributed.py @@ -0,0 +1,463 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import multiprocessing as mp +import platform + +import numpy as np +import pytest + +import megengine as mge +import megengine.distributed as dist +from megengine import Parameter, Tensor, tensor +from megengine.functional.distributed import ( + all_gather, + all_reduce_max, + all_reduce_min, + all_reduce_sum, + all_to_all, + broadcast, + gather, + reduce_scatter_sum, + reduce_sum, + remote_recv, + remote_send, + scatter, +) + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_reduce_sum(): + world_size = 2 + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker(rank, data, expect, port): + if mge.get_device_count("gpu") < world_size: + return + dist.init_process_group("localhost", port, world_size, rank, rank) + inp = tensor(data) + output = reduce_sum(inp) + if rank == 0: + assert np.allclose(output.numpy(), expect) + else: + assert np.allclose(output.numpy(), 0) + + def check(shape): + x = np.random.rand(*shape).astype("float32") + y = np.random.rand(*shape).astype("float32") + z = x + y + p0 = mp.Process(target=worker, args=(0, x, z, port)) + p1 = mp.Process(target=worker, args=(1, y, None, port)) + + p0.start() + p1.start() + + p0.join(10) + p1.join(10) + + assert p0.exitcode == 0 and p1.exitcode == 0 + + for shape in [(2, 3), (8, 10), (99, 77)]: + check(shape) + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_broadcast(): + world_size = 2 + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker(rank, data, expect, port): + if mge.get_device_count("gpu") < world_size: + return + dist.init_process_group("localhost", port, world_size, rank, rank) + inp = tensor(data) + output = broadcast(inp) + assert np.allclose(output.numpy(), expect) + + def check(shape): + x = np.random.rand(*shape).astype("float32") + y = x + 1 + p0 = mp.Process(target=worker, args=(0, x, x, port)) + p1 = mp.Process(target=worker, args=(1, y, x, port)) + + p0.start() + p1.start() + + p0.join(10) + p1.join(10) + + assert p0.exitcode == 0 and p1.exitcode == 0 + + for shape in [(2, 3), (8, 10), (99, 77)]: + check(shape) + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_all_gather(): + world_size = 2 + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker(rank, data, expect, port): + if mge.get_device_count("gpu") < world_size: + return + dist.init_process_group("localhost", port, world_size, rank, rank) + inp = tensor(data) + output = all_gather(inp) + assert np.allclose(output.numpy(), expect) + + def check(shape): + x = np.random.rand(*shape).astype("float32") + y = np.random.rand(*shape).astype("float32") + z = np.concatenate((x, y)) + p0 = mp.Process(target=worker, args=(0, x, z, port)) + p1 = mp.Process(target=worker, args=(1, y, z, port)) + + p0.start() + p1.start() + + p0.join(10) + p1.join(10) + + assert p0.exitcode == 0 and p1.exitcode == 0 + + for shape in [(2, 3), (8, 10), (99, 77)]: + check(shape) + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_reduce_scatter_sum(): + world_size = 2 + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker(rank, data, expect, port): + if mge.get_device_count("gpu") < world_size: + return + dist.init_process_group("localhost", port, world_size, rank, rank) + inp = tensor(data) + output = reduce_scatter_sum(inp) + assert np.allclose(output.numpy(), expect) + + def check(shape): + x = np.random.rand(*shape).astype("float32") + y = np.random.rand(*shape).astype("float32") + z = x + y + p0 = mp.Process(target=worker, args=(0, x, z[: shape[0] // 2], port)) + p1 = mp.Process(target=worker, args=(1, y, z[shape[0] // 2 :], port)) + + p0.start() + p1.start() + + p0.join(10) + p1.join(10) + + assert p0.exitcode == 0 and p1.exitcode == 0 + + for shape in [(2, 4), (8, 10), (88, 44)]: + check(shape) + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_all_reduce_sum(): + world_size = 2 + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker(rank, data, expect, port): + if mge.get_device_count("gpu") < world_size: + return + dist.init_process_group("localhost", port, world_size, rank, rank) + inp = tensor(data) + output = all_reduce_sum(inp) + assert np.allclose(output.numpy(), expect) + + def check(shape): + x = np.random.rand(*shape).astype("float32") + y = np.random.rand(*shape).astype("float32") + z = x + y + p0 = mp.Process(target=worker, args=(0, x, z, port)) + p1 = mp.Process(target=worker, args=(1, y, z, port)) + + p0.start() + p1.start() + + p0.join(10) + p1.join(10) + + assert p0.exitcode == 0 and p1.exitcode == 0 + + for shape in [(2, 3), (8, 10), (99, 77)]: + check(shape) + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_all_reduce_max(): + world_size = 2 + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker(rank, data, expect, port): + if mge.get_device_count("gpu") < world_size: + return + dist.init_process_group("localhost", port, world_size, rank, rank) + inp = tensor(data) + output = all_reduce_max(inp) + assert np.allclose(output.numpy(), expect) + + def check(shape): + x = np.random.rand(*shape).astype("float32") + y = np.random.rand(*shape).astype("float32") + z = np.maximum(x, y) + p0 = mp.Process(target=worker, args=(0, x, z, port)) + p1 = mp.Process(target=worker, args=(1, y, z, port)) + + p0.start() + p1.start() + + p0.join(10) + p1.join(10) + + assert p0.exitcode == 0 and p1.exitcode == 0 + + for shape in [(2, 3), (8, 10), (99, 77)]: + check(shape) + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_all_reduce_min(): + world_size = 2 + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker(rank, data, expect, port): + if mge.get_device_count("gpu") < world_size: + return + dist.init_process_group("localhost", port, world_size, rank, rank) + inp = tensor(data) + output = all_reduce_min(inp) + assert np.allclose(output.numpy(), expect) + + def check(shape): + x = np.random.rand(*shape).astype("float32") + y = np.random.rand(*shape).astype("float32") + z = np.minimum(x, y) + p0 = mp.Process(target=worker, args=(0, x, z, port)) + p1 = mp.Process(target=worker, args=(1, y, z, port)) + + p0.start() + p1.start() + + p0.join(10) + p1.join(10) + + assert p0.exitcode == 0 and p1.exitcode == 0 + + for shape in [(2, 3), (8, 10), (99, 77)]: + check(shape) + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_gather(): + world_size = 2 + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker(rank, data, expect, port): + if mge.get_device_count("gpu") < world_size: + return + dist.init_process_group("localhost", port, world_size, rank, rank) + inp = tensor(data) + output = gather(inp) + if rank == 0: + assert np.allclose(output.numpy(), expect) + else: + assert np.allclose(output.numpy(), 0) + + def check(shape): + x = np.random.rand(*shape).astype("float32") + y = np.random.rand(*shape).astype("float32") + z = np.concatenate((x, y)) + p0 = mp.Process(target=worker, args=(0, x, z, port)) + p1 = mp.Process(target=worker, args=(1, y, None, port)) + + p0.start() + p1.start() + + p0.join(10) + p1.join(10) + + assert p0.exitcode == 0 and p1.exitcode == 0 + + for shape in [(2, 3), (8, 10), (99, 77)]: + check(shape) + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_scatter(): + world_size = 2 + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker(rank, data, expect, port): + if mge.get_device_count("gpu") < world_size: + return + dist.init_process_group("localhost", port, world_size, rank, rank) + inp = tensor(data) + output = scatter(inp) + assert np.allclose(output.numpy(), expect) + + def check(shape): + x = np.random.rand(*shape).astype("float32") + y = x + 1 + p0 = mp.Process(target=worker, args=(0, x, x[: shape[0] // 2], port)) + p1 = mp.Process(target=worker, args=(1, y, x[shape[0] // 2 :], port)) + + p0.start() + p1.start() + + p0.join(10) + p1.join(10) + + assert p0.exitcode == 0 and p1.exitcode == 0 + + for shape in [(2, 3), (8, 10), (100, 77)]: + check(shape) + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_all_to_all(): + world_size = 2 + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker(rank, data, expect, port): + if mge.get_device_count("gpu") < world_size: + return + dist.init_process_group("localhost", port, world_size, rank, rank) + inp = tensor(data) + output = all_to_all(inp) + assert np.allclose(output.numpy(), expect) + + def check(shape): + x = np.random.rand(*shape).astype("float32") + y = np.random.rand(*shape).astype("float32") + a = np.concatenate((x[: shape[0] // 2], y[: shape[0] // 2])) + b = np.concatenate((x[shape[0] // 2 :], y[shape[0] // 2 :])) + p0 = mp.Process(target=worker, args=(0, x, a, port)) + p1 = mp.Process(target=worker, args=(1, y, b, port)) + + p0.start() + p1.start() + + p0.join(10) + p1.join(10) + + assert p0.exitcode == 0 and p1.exitcode == 0 + + for shape in [(2, 3), (8, 10), (100, 77)]: + check(shape) + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_io_remote(): + world_size = 2 + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + val = np.random.rand(4, 5).astype(np.float32) + + def worker(rank): + if mge.get_device_count("gpu") < world_size: + return + if rank == 0: # remote send + dist.init_process_group("localhost", port, world_size, rank, rank) + x = Tensor(val, device="gpu0") + y = remote_send(x, 1) + assert y.numpy()[0] == 0 + else: # remote recv + dist.init_process_group("localhost", port, world_size, rank, rank) + y = remote_recv(0, val.shape, val.dtype, cn="gpu1") + np.testing.assert_almost_equal(val, y.numpy()) + + procs = [] + for rank in range(world_size): + p = mp.Process(target=worker, args=(rank,)) + p.start() + procs.append(p) + + for p in procs: + p.join(10) + assert p.exitcode == 0 diff --git a/imperative/python/test/unit/functional/test_elemwise.py b/imperative/python/test/unit/functional/test_elemwise.py new file mode 100644 index 0000000000000000000000000000000000000000..75d6874dbb6a74617716701d7543af4cdda57b44 --- /dev/null +++ b/imperative/python/test/unit/functional/test_elemwise.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +import megengine.functional as F +from megengine import tensor +from megengine.test import assertTensorClose + + +def test_abs(): + assertTensorClose( + F.abs(tensor([-3.0, -4.0, -5.0])).numpy(), + np.abs(np.array([-3.0, -4.0, -5.0], dtype=np.float32)), + ) + + # assertTensorClose(F.abs(-3.0), np.abs(np.float32(-3.0))) + + +def test_multiply(): + # assertTensorClose( + # F.mul(-3.0, -4.0), np.multiply(np.float32(-3.0), np.float32(-4.0)) + # ) + + assertTensorClose( + F.mul(tensor([3.0, 4.0]), 4.0).numpy(), + np.multiply(np.array([3.0, 4.0], dtype=np.float32), 4.0), + ) + + assertTensorClose( + F.mul(4.0, tensor([3.0, 4.0])).numpy(), + np.multiply(4.0, np.array([3.0, 4.0], dtype=np.float32)), + ) + + assertTensorClose( + F.mul(tensor([3.0, 4.0]), tensor([3.0, 4.0])).numpy(), + np.multiply( + np.array([3.0, 4.0], dtype=np.float32), + np.array([3.0, 4.0], dtype=np.float32), + ), + ) + + +def test_clamp(): + """Fix an issue when `lower` or `upper` is 0, it will be recognized as `False` and + `F.clamp` will fall into wrong conditions unexpectedly. + """ + x = np.linspace(-6, 6, dtype="float32") + assertTensorClose(F.clamp(tensor(x) + 3, 0, 6).numpy(), np.clip(x + 3, 0, 6)) + assertTensorClose(F.clamp(tensor(x) - 3, -6, 0).numpy(), np.clip(x - 3, -6, 0)) + + +# def test_isnan(): +# for case in [[1, float("nan"), 0]]: +# assertTensorClose(F.isnan(tensor(case)), np.isnan(case).astype("uint8")) + + +def test_isinf(): + for case in [[1, float("inf"), 0]]: + assertTensorClose(F.isinf(tensor(case)).numpy(), np.isinf(case).astype("uint8")) + + +def test_cosh(): + np.random.seed(42) + x = np.random.randn(100).astype("float32") + y_np = np.cosh(x) + y_mge = F.cosh(tensor(x)).numpy() + np.testing.assert_allclose(y_np, y_mge, rtol=1e-5) + + +def test_sinh(): + np.random.seed(42) + x = np.random.randn(100).astype("float32") + y_np = np.sinh(x) + y_mge = F.sinh(tensor(x)).numpy() + np.testing.assert_allclose(y_np, y_mge, rtol=1e-5) + + +def test_asinh(): + np.random.seed(42) + x = np.random.randn(100).astype("float32") + y_np = np.arcsinh(x) + y_mge = F.asinh(tensor(x)).numpy() + np.testing.assert_almost_equal(y_np, y_mge, decimal=5) + + +def test_acosh(): + x = np.arange(0, 10000).astype("float32") / 100 + 1 + y_np = np.arccosh(x) + y_mge = F.acosh(tensor(x)).numpy() + np.testing.assert_almost_equal(y_np, y_mge, decimal=6) + + +def test_atanh(): + np.random.seed(42) + x = np.random.rand(100).astype("float32") * 2 - 1 + y_np = np.arctanh(x) + y_mge = F.atanh(tensor(x)).numpy() + np.testing.assert_almost_equal(y_np, y_mge, decimal=5) + + +def test_fast_tanh(): + np.random.seed(42) + x = np.random.randn(100).astype("float32") + y_np = x * (27.0 + x * x) / (27.0 + 9.0 * x * x) + y_mge = F.fast_tanh(tensor(x)).numpy() + np.testing.assert_almost_equal(y_np, y_mge, decimal=6) + + +def test_hswish(): + np.random.seed(42) + x = np.random.randn(100).astype("float32") + y_np = x * np.minimum(np.maximum(x + 3, 0), 6) / 6 + y_mge = F.hswish(tensor(x)).numpy() + np.testing.assert_almost_equal(y_np, y_mge, decimal=6) + + +def test_hsigmoid(): + np.random.seed(42) + x = np.random.randn(100).astype("float32") + y_np = np.minimum(np.maximum(x + 3, 0), 6) / 6 + y_mge = F.hsigmoid(tensor(x)).numpy() + np.testing.assert_equal(y_np, y_mge) + + +def test_logical_oprs(): + x = np.array([[True, False], [False, True]]) + y = np.array([[True, True], [False, False]]) + xx = tensor(x) + yy = tensor(y) + np.testing.assert_equal(~x, (F.logical_not(xx)).numpy()) + np.testing.assert_equal(x & y, F.logical_and(xx, yy).numpy()) + np.testing.assert_equal(x | y, F.logical_or(xx, yy).numpy()) + np.testing.assert_equal(x ^ y, F.logical_xor(xx, yy).numpy()) diff --git a/imperative/python/test/unit/functional/test_functional.py b/imperative/python/test/unit/functional/test_functional.py new file mode 100644 index 0000000000000000000000000000000000000000..beaff64845f59fb48e8f9ffd1de54de272e21b68 --- /dev/null +++ b/imperative/python/test/unit/functional/test_functional.py @@ -0,0 +1,623 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import itertools + +import numpy as np +import pytest + +import megengine.core.tensor.dtype as dtype +import megengine.functional as F +from megengine import Buffer, Parameter, is_cuda_available, tensor +from megengine.core.autodiff.grad import Grad +from megengine.test import assertTensorClose + + +def _default_compare_fn(x, y): + assertTensorClose(x.numpy(), y) + + +def opr_test(cases, func, compare_fn=_default_compare_fn, ref_fn=None, **kwargs): + """ + func: the function to run opr. + compare_fn: the function to compare the result and expected, use assertTensorClose if None. + ref_fn: the function to generate expected data, should assign output if None. + cases: the list which have dict element, the list length should be 2 for dynamic shape test. + and the dict should have input, + and should have output if ref_fn is None. + should use list for multiple inputs and outputs for each case. + kwargs: The additional kwargs for opr func. + + simple examples: + + dtype = np.float32 + cases = [{"input": [10, 20]}, {"input": [20, 30]}] + opr_test(cases, + F.eye, + ref_fn=lambda n, m: np.eye(n, m).astype(dtype), + dtype=dtype) + + """ + + def check_results(results, expected): + if not isinstance(results, (tuple, list)): + results = (results,) + for r, e in zip(results, expected): + compare_fn(r, e) + + def get_param(cases, idx): + case = cases[idx] + inp = case.get("input", None) + outp = case.get("output", None) + if inp is None: + raise ValueError("the test case should have input") + if not isinstance(inp, (tuple, list)): + inp = (inp,) + if ref_fn is not None and callable(ref_fn): + outp = ref_fn(*inp) + if outp is None: + raise ValueError("the test case should have output or reference function") + if not isinstance(outp, (tuple, list)): + outp = (outp,) + + return inp, outp + + if len(cases) == 0: + raise ValueError("should give one case at least") + + if not callable(func): + raise ValueError("the input func should be callable") + + inp, outp = get_param(cases, 0) + inp_tensor = [tensor(inpi) for inpi in inp] + + results = func(*inp_tensor, **kwargs) + check_results(results, outp) + + +def test_flatten(): + data0_shape = (2, 3, 4, 5) + data1_shape = (4, 5, 6, 7) + data0 = np.random.random(data0_shape).astype(np.float32) + data1 = np.random.random(data1_shape).astype(np.float32) + + def compare_fn(x, y): + assert x.numpy().shape == y + + output0 = (2 * 3 * 4 * 5,) + output1 = (4 * 5 * 6 * 7,) + cases = [ + {"input": data0, "output": (output0,)}, + {"input": data1, "output": (output1,)}, + ] + opr_test(cases, F.flatten, compare_fn=compare_fn) + + output0 = (2, 3 * 4 * 5) + output1 = (4, 5 * 6 * 7) + cases = [ + {"input": data0, "output": (output0,)}, + {"input": data1, "output": (output1,)}, + ] + opr_test(cases, F.flatten, compare_fn=compare_fn, start_axis=1) + + output0 = (2, 3, 4 * 5) + output1 = (4, 5, 6 * 7) + cases = [ + {"input": data0, "output": (output0,)}, + {"input": data1, "output": (output1,)}, + ] + opr_test(cases, F.flatten, compare_fn=compare_fn, start_axis=2) + + output0 = (2, 3 * 4, 5) + output1 = (4, 5 * 6, 7) + cases = [ + {"input": data0, "output": (output0,)}, + {"input": data1, "output": (output1,)}, + ] + opr_test(cases, F.flatten, compare_fn=compare_fn, start_axis=1, end_axis=2) + + +# def test_where(): +# maskv0 = np.array([[1, 0], [0, 1]], dtype=np.int32) +# xv0 = np.array([[1, np.inf], [np.nan, 4]], dtype=np.float32) +# yv0 = np.array([[5, 6], [7, 8]], dtype=np.float32) + +# maskv1 = np.array([[1, 0, 1], [1, 0, 0], [1, 1, 0]], dtype=np.int32) +# xv1 = np.array([[1, np.inf, 2], [0, np.nan, 4], [1, 5, 7]], dtype=np.float32) +# yv1 = np.array([[5, 6, 9], [2, 7, 8], [2, 1, 9]], dtype=np.float32) + +# cases = [ +# {"input": [maskv0, xv0, yv0]}, +# {"input": [maskv1, xv1, yv1]}, +# ] +# opr_test(cases, F.where, ref_fn=np.where) + +# maskv2 = np.array([1, 1, 1], dtype=np.int32) +# xv2 = np.array([1, 3, 2], dtype=np.float32) +# yv2 = np.array([5, 6, 9], dtype=np.float32) + +# maskv3 = np.array([0, 0, 0], dtype=np.int32) +# xv3 = np.array([1, 3, 2], dtype=np.float32) +# yv3 = np.array([5, 6, 9], dtype=np.float32) + +# cases = [ +# {"input": [maskv2, xv2, yv2]}, +# {"input": [maskv3, xv3, yv3]}, +# ] +# opr_test(cases, F.where, ref_fn=np.where) + + +def test_matmul(): + shape1 = 3 + shape2 = 3 + shape3 = (3, 5) + shape4 = (5, 6) + data1 = np.random.random(shape1).astype("float32") + data2 = np.random.random(shape2).astype("float32") + data3 = np.random.random(shape3).astype("float32") + data4 = np.random.random(shape4).astype("float32") + + cases = [ + {"input": [data1, data2]}, + {"input": [data2, data3]}, + {"input": [data3, data4]}, + ] + opr_test(cases, F.matmul, ref_fn=np.matmul) + + batch_size = 10 + shape1 = (batch_size, 2, 3) + shape2 = (batch_size, 3, 4) + shape3 = (batch_size, 10, 4, 5) + data1 = np.random.random(shape1).astype("float32") + data2 = np.random.random(shape2).astype("float32") + data3 = np.random.random(shape3).astype("float32") + + cases = [{"input": [data1, data2]}, {"input": [data2, data3]}] + for i in range(0, batch_size): + + def compare_fn(x, y): + x.numpy()[i, ...] == y + + opr_test( + cases, + F.matmul, + compare_fn=compare_fn, + ref_fn=lambda x, y: np.matmul(x[i, ...], y[i, ...]), + ) + + +def test_interpolate(): + def linear_interpolate(): + inp = tensor(np.arange(1, 3, dtype=np.float32).reshape(1, 1, 2)) + + out = F.interpolate(inp, scale_factor=2.0, mode="LINEAR") + out2 = F.interpolate(inp, 4, mode="LINEAR") + + assertTensorClose( + out.numpy(), np.array([[[1.0, 1.25, 1.75, 2.0]]], dtype=np.float32) + ) + assertTensorClose( + out2.numpy(), np.array([[[1.0, 1.25, 1.75, 2.0]]], dtype=np.float32) + ) + + def many_batch_interpolate(): + inp = tensor(np.arange(1, 9, dtype=np.float32).reshape(2, 1, 2, 2)) + + out = F.interpolate(inp, [4, 4]) + out2 = F.interpolate(inp, scale_factor=2.0) + + assertTensorClose(out.numpy(), out2.numpy()) + + def assign_corner_interpolate(): + inp = tensor(np.arange(1, 5, dtype=np.float32).reshape(1, 1, 2, 2)) + + out = F.interpolate(inp, [4, 4], align_corners=True) + out2 = F.interpolate(inp, scale_factor=2.0, align_corners=True) + + assertTensorClose(out.numpy(), out2.numpy()) + + def error_shape_linear_interpolate(): + inp = tensor(np.arange(1, 5, dtype=np.float32).reshape(1, 1, 2, 2)) + + with pytest.raises(ValueError): + F.interpolate(inp, scale_factor=2.0, mode="LINEAR") + + def inappropriate_scale_linear_interpolate(): + inp = tensor(np.arange(1, 3, dtype=np.float32).reshape(1, 1, 2)) + + with pytest.raises(ValueError): + F.interpolate(inp, scale_factor=[2.0, 3.0], mode="LINEAR") + + linear_interpolate() + many_batch_interpolate() + assign_corner_interpolate() + error_shape_linear_interpolate() + inappropriate_scale_linear_interpolate() + + +def _save_to(self, name="grad"): + def callback(tensor, grad): + setattr(self, name, grad) + + return callback + + +def _gen_roi_inp(): + inp_feat = np.random.randn(2, 32, 256, 256) + rois = np.zeros((4, 5)) + rois[:, 0] = [0, 0, 1, 1] + rois[:, 1:3] = np.random.rand(4, 2) * 100 + rois[:, 3:] = np.random.rand(4, 2) * 100 + 150 + + inp_feat = tensor(inp_feat) + rois = tensor(rois) + return inp_feat, rois + + +def test_roi_align(): + inp_feat, rois = _gen_roi_inp() + grad = Grad().wrt(inp_feat, callback=_save_to(inp_feat)) + + output_shape = (7, 7) + out_feat = F.roi_align( + inp_feat, + rois, + output_shape=output_shape, + mode="average", + spatial_scale=1.0 / 4, + sample_points=2, + aligned=True, + ) + assert out_feat.shape == (rois.shape[0], inp_feat.shape[1], *output_shape) + + grad(out_feat, tensor(F.ones_like(out_feat))) + assert inp_feat.grad.shape == inp_feat.shape + + +def test_roi_pooling(): + inp_feat, rois = _gen_roi_inp() + grad = Grad().wrt(inp_feat, callback=_save_to(inp_feat)) + output_shape = (7, 7) + out_feat = F.roi_pooling( + inp_feat, rois, output_shape=output_shape, mode="max", scale=1.0 / 4, + ) + assert out_feat.shape == (rois.shape[0], inp_feat.shape[1], *output_shape) + + grad(out_feat, tensor(F.ones_like(out_feat))) + assert inp_feat.grad.shape == inp_feat.shape + + +# def test_one_hot(): +# def onehot_low_dimension(): +# inp = tensor(np.arange(1, 4, dtype=np.int32)) +# out = F.one_hot(inp, num_classes=4) + +# assertTensorClose( +# out.numpy(), np.eye(4, dtype=np.int32)[np.arange(1, 4, dtype=np.int32)] +# ) + + +# def onehot_high_dimension(): +# arr = np.array( +# [[3, 2, 4, 4, 2, 4, 0, 4, 4, 1], [4, 1, 1, 3, 2, 2, 4, 2, 4, 3]], dtype=np.int32 +# ) + +# inp = tensor(arr) +# out = F.one_hot(inp, 10) + +# assertTensorClose(out.numpy(), np.eye(10, dtype=np.int32)[arr]) + +# onehot_low_dimension() +# onehot_high_dimension() + + +def test_add_update(): + shape = (2, 3) + v = np.random.random(shape).astype(np.float32) + b = Buffer(v) + + u = F.add_update(b, 1) + assertTensorClose(u.numpy(), v + 1) + u = F.add_update(b, 1) + assertTensorClose(u.numpy(), v + 2) + + x = np.ones((2, 2), dtype=np.float32) + y = x * 0.5 + dest = tensor(x) + delta = tensor(y) + r = F.add_update(dest, delta, alpha=0.9, beta=0.1, bias=0.1) + assertTensorClose(r.numpy(), x * 0.9 + y * 0.1 + 0.1) + + +def test_add_update_params(): + b = np.random.random((2, 3)).astype(np.float32) + y = Buffer(b) + + # @jit.trace + def f(x): + return F.add_update(y, x) + + f(np.zeros((2, 3)).astype(np.float32)) + + z = Buffer(np.zeros((2, 3)).astype(np.float32)) + F.add_update(y, z, beta=0.1) + + res = f(np.ones((2, 3)).astype(np.float32)) + assertTensorClose(res.numpy(), b + 1) + + +# def test_cross_entropy_with_softmax(): +# data1_shape = (1, 2) +# label1_shape = (1,) +# data2_shape = (1, 3) +# label2_shape = (1,) + +# data1 = np.array([1, 0.5], dtype=np.float32).reshape(data1_shape) +# label1 = np.array([1], dtype=np.int32).reshape(label1_shape) +# expect1 = F.cross_entropy(F.softmax(tensor(data1)), tensor(label1)).numpy() + +# data2 = np.array([0.3, 0.4, 0.3], dtype=np.float32).reshape(data2_shape) +# label2 = np.array([1], dtype=np.int32).reshape(label2_shape) +# expect2 = F.cross_entropy(F.softmax(tensor(data2)), tensor(label2)).numpy() + +# cases = [ +# {"input": [data1, label1], "output": expect1,}, +# {"input": [data2, label2], "output": expect2,}, +# ] +# opr_test(cases, F.cross_entropy_with_softmax) + + +# def test_cross_entropy(): +# data1_shape = (1, 2) +# label1_shape = (1,) +# data2_shape = (1, 3) +# label2_shape = (1,) + +# data1 = np.array([0.5, 0.5], dtype=np.float32).reshape(data1_shape) +# label1 = np.array([1], dtype=np.int32).reshape(label1_shape) +# expect1 = np.array([-np.log(0.5)], dtype=np.float32) + +# data2 = np.array([0.3, 0.4, 0.3], dtype=np.float32).reshape(data2_shape) +# label2 = np.array([1], dtype=np.int32).reshape(label2_shape) +# expect2 = np.array([-np.log(0.4)], dtype=np.float32) + +# cases = [ +# {"input": [data1, label1], "output": expect1,}, +# {"input": [data2, label2], "output": expect2,}, +# ] +# opr_test(cases, F.cross_entropy) + + +def test_binary_cross_entropy(): + data1_shape = (2, 2) + label1_shape = (2, 2) + data2_shape = (2, 3) + label2_shape = (2, 3) + + def sigmoid(x): + return 1 / (1 + np.exp(-x)) + + def compare_fn(x, y): + assertTensorClose(x.numpy(), y, max_err=5e-4) + + np.random.seed(123) + data1 = sigmoid(np.random.uniform(size=data1_shape).astype(np.float32)) + label1 = np.random.uniform(size=label1_shape).astype(np.float32) + expect1 = np.array([0.6361], dtype=np.float32) + + np.random.seed(123) + data2 = sigmoid(np.random.uniform(size=data2_shape).astype(np.float32)) + label2 = np.random.uniform(size=label2_shape).astype(np.float32) + expect2 = np.array([0.6750], dtype=np.float32) + + cases = [ + {"input": [data1, label1], "output": expect1,}, + {"input": [data2, label2], "output": expect2,}, + ] + opr_test(cases, F.binary_cross_entropy, compare_fn=compare_fn) + + +def test_hinge_loss(): + np.random.seed(123) + # case with L1 norm + cases = [] + for shape in [(2, 2), (2, 3)]: + data = np.random.uniform(size=shape).astype(np.float32) + label = 2 * np.random.randint(0, 1, size=shape).astype(np.float32) - 1 + expect = np.clip(0, np.inf, 1 - data * label).sum(axis=1).mean() + cases.append({"input": [data, label], "output": expect}) + + opr_test(cases, F.hinge_loss) + + # cases with L2 norm + cases = [] + for shape in [(2, 2), (2, 3)]: + data = np.random.uniform(size=shape).astype(np.float32) + label = 2 * np.random.randint(0, 1, size=shape).astype(np.float32) - 1 + expect = ((np.clip(0, np.inf, 1 - data * label) ** 2).sum(axis=1)).mean() + cases.append({"input": [data, label], "output": expect}) + + def hinge_loss_with_l2_norm(pred, label): + return F.hinge_loss(pred, label, "L2") + + opr_test(cases, hinge_loss_with_l2_norm) + + +def test_nms(): + x = np.array( + [ + [0, 0, 100, 100], + [10, 10, 100, 100], + [50, 50, 100, 100], + [100, 100, 150, 150], + ], + dtype=np.float32, + ) + inp = tensor(x) + scores = tensor([0.5, 0.8, 0.9, 0.6], dtype=np.float32) + result = F.nms(inp, iou_thresh=0.5, scores=scores) + np.testing.assert_equal(result.numpy(), np.array([2, 1, 3], dtype=np.int32)) + + +def test_batched_nms(): + x = np.array( + [ + [0, 0, 100, 100], + [0.5, 0.5, 1.5, 1.5], + [20, 20, 100, 100], + [0.5, 0.5, 1.0, 1.0], + [10, 10, 100, 100], + [0.5, 0.5, 1.0, 1.0], + ], + dtype=np.float32, + ) + inp = tensor(x) + scores = tensor([0.6, 0.9, 0.5, 0.6, 0.8, 0.7], dtype=np.float32) + idxs = tensor([0, 1, 0, 1, 0, 1], dtype=np.int32) + results = F.batched_nms(inp, iou_thresh=0.5, idxs=idxs, scores=scores) + np.testing.assert_equal(results.numpy(), np.array([1, 4, 5], dtype=np.int32)) + + +# def test_smooth_l1_loss(): +# np.random.seed(123) +# cases = [] +# for shape in [(2, 2), (2, 3)]: +# data = np.random.uniform(size=shape).astype(np.float32) +# label = np.random.uniform(size=shape).astype(np.float32) +# diff = np.abs(data - label) +# expect = np.where(diff < 1, 0.5 * diff ** 2, diff - 0.5).mean() +# cases.append({"input": [data, label], "output": tensor(expect)}) + +# opr_test(cases, F.smooth_l1_loss) + + +def test_conv_bias(): + inp_scale = 1.5 + w_scale = 2.5 + outp_scale = 1.5 + inp_dtype = dtype.qint8(inp_scale) + w_dtype = dtype.qint8(w_scale) + b_dtype = dtype.qint32(inp_scale * w_scale) + out_dtype = dtype.qint8(outp_scale) + + def run( + N, + IC, + OC, + IH, + IW, + KH, + KW, + PH, + PW, + SH, + SW, + has_bias=True, + nonlinear_mode="IDENTITY", + ): + inp_v = np.random.normal(size=(N, IC, IH, IW)) + w_v = np.random.normal(size=(OC, IC, KW, KW)) + b_v = np.random.normal(size=(1, OC, 1, 1)) + inp_scale = dtype.get_scale(inp_dtype) + w_scale = dtype.get_scale(w_dtype) + b_scale = dtype.get_scale(b_dtype) + + inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype) + wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype) + bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype) + + inp_int8 = tensor(inpv, dtype=inp_dtype) + w_int8 = Parameter(wv, dtype=w_dtype) + b_int32 = Parameter(bv, dtype=b_dtype) + + inp_fp32 = inp_int8.astype("float32") + w_fp32 = w_int8.astype("float32") + b_fp32 = b_int32.astype("float32") + + def convert_to_nchw4(var): + var = F.reshape( + var, (var.shape[0], var.shape[1] // 4, 4, var.shape[2], var.shape[3]) + ) + var = F.dimshuffle(var, (0, 1, 3, 4, 2)) + return var + + def run_conv2d(inp, w, b): + O = F.conv2d( + inp, w, b if has_bias else None, stride=(SH, SW), padding=(PH, PW), + ) + if nonlinear_mode == "RELU": + return F.relu(O) + else: + return O + + def run_conv_bias(inp, w, b, format="NCHW"): + b = b if has_bias else Parameter(np.zeros_like(b.numpy())) + if format == "NCHW4": + inp = convert_to_nchw4(inp) + w = convert_to_nchw4(w) + b = convert_to_nchw4(b) + return F.conv_bias_activation( + inp, + w, + b, + stride=(SH, SW), + padding=(PH, PW), + format=format, + dtype=out_dtype, + nonlinear_mode=nonlinear_mode, + ) + + format = "NCHW4" if is_cuda_available() else "NCHW" + + expected = run_conv2d(inp_fp32, w_fp32, b_fp32) + expected = expected.astype(out_dtype).astype("float32") + result = run_conv_bias(inp_int8, w_int8, b_int32, format=format).astype( + "float32" + ) + if format == "NCHW4": + result = F.dimshuffle(result, (0, 1, 4, 2, 3)) + expected = F.flatten(expected) + result = F.flatten(result) + assertTensorClose(result.numpy(), expected.numpy(), max_err=outp_scale) + + run(1, 4, 4, 24, 33, 1, 1, 2, 3, 1, 1, False) + run(10, 12, 24, 46, 46, 1, 1, 2, 1, 3, 1, False) + run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False) + + run(1, 4, 4, 24, 33, 1, 1, 2, 3, 1, 1) + run(10, 12, 24, 46, 46, 1, 1, 2, 1, 3, 1) + run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2) + + run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False, "RELU") + run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, True, "RELU") + + +# def test_softplus(): +# x = np.arange(1000).astype(np.float32) +# out = F.softplus(tensor(x)) +# mask = x <= 20 +# with np.errstate(over="ignore"): +# expected = np.where(mask, np.log(1 + np.exp(x)), x) +# assertTensorClose(out, expected) +# beta = 2 +# out = F.softplus(tensor(x), beta=beta, threshold=30) +# mask = beta * x <= 30 +# # ignore overflow +# with np.errstate(over="ignore"): +# expected = np.where(mask, np.log(1 + np.exp(x * beta)) / beta, x) +# assertTensorClose(out, expected) + + +def test_condtake(): + x = np.array([[1, 2, 3], [4, 5, 6]]) + y = np.array([[True, False, True], [False, True, True]]) + xx = tensor(x) + yy = tensor(y) + val, idx = F.cond_take(yy, xx) + np.testing.assert_equal(val.numpy(), x[y]) + np.testing.assert_equal(idx.numpy(), np.where(y.reshape(-1))[0]) diff --git a/imperative/python/test/unit/functional/test_math.py b/imperative/python/test/unit/functional/test_math.py new file mode 100644 index 0000000000000000000000000000000000000000..d693f36b9d1e508bba8328c9ef7f51890ff9272e --- /dev/null +++ b/imperative/python/test/unit/functional/test_math.py @@ -0,0 +1,258 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from functools import partial + +import numpy as np + +import megengine.functional as F +from megengine import tensor +from megengine.test import assertTensorClose + +# from helpers import opr_test + + +def _default_compare_fn(x, y): + assertTensorClose(x.numpy(), y) + + +def opr_test(cases, func, compare_fn=_default_compare_fn, ref_fn=None, **kwargs): + """ + func: the function to run opr. + compare_fn: the function to compare the result and expected, use assertTensorClose if None. + ref_fn: the function to generate expected data, should assign output if None. + cases: the list which have dict element, the list length should be 2 for dynamic shape test. + and the dict should have input, + and should have output if ref_fn is None. + should use list for multiple inputs and outputs for each case. + kwargs: The additional kwargs for opr func. + + simple examples: + + dtype = np.float32 + cases = [{"input": [10, 20]}, {"input": [20, 30]}] + opr_test(cases, + F.eye, + ref_fn=lambda n, m: np.eye(n, m).astype(dtype), + dtype=dtype) + + """ + + def check_results(results, expected): + if not isinstance(results, tuple): + results = (results,) + for r, e in zip(results, expected): + compare_fn(r, e) + + def get_param(cases, idx): + case = cases[idx] + inp = case.get("input", None) + outp = case.get("output", None) + if inp is None: + raise ValueError("the test case should have input") + if not isinstance(inp, list): + inp = (inp,) + else: + inp = tuple(inp) + if ref_fn is not None and callable(ref_fn): + outp = ref_fn(*inp) + if outp is None: + raise ValueError("the test case should have output or reference function") + if not isinstance(outp, list): + outp = (outp,) + else: + outp = tuple(outp) + + return inp, outp + + if len(cases) == 0: + raise ValueError("should give one case at least") + + if not callable(func): + raise ValueError("the input func should be callable") + + inp, outp = get_param(cases, 0) + inp_tensor = [tensor(inpi) for inpi in inp] + + results = func(*inp_tensor, **kwargs) + check_results(results, outp) + + +def common_test_reduce(opr, ref_opr): + data1_shape = (5, 6, 7) + data2_shape = (2, 9, 12) + data1 = np.random.random(data1_shape).astype(np.float32) + data2 = np.random.random(data2_shape).astype(np.float32) + cases = [{"input": data1}, {"input": data2}] + + if opr not in (F.argmin, F.argmax): + # test default axis + opr_test(cases, opr, ref_fn=ref_opr) + # test all axises in range of input shape + for axis in range(-3, 3): + # test keepdims False + opr_test(cases, opr, ref_fn=lambda x: ref_opr(x, axis=axis), axis=axis) + # test keepdims True + opr_test( + cases, + opr, + ref_fn=lambda x: ref_opr(x, axis=axis, keepdims=True), + axis=axis, + keepdims=True, + ) + else: + # test defaut axis + opr_test(cases, opr, ref_fn=lambda x: ref_opr(x).astype(np.int32)) + # test all axises in range of input shape + for axis in range(0, 3): + opr_test( + cases, + opr, + ref_fn=lambda x: ref_opr(x, axis=axis).astype(np.int32), + axis=axis, + ) + + +def test_sum(): + common_test_reduce(opr=F.sum, ref_opr=np.sum) + + +def test_prod(): + common_test_reduce(opr=F.prod, ref_opr=np.prod) + + +def test_mean(): + common_test_reduce(opr=F.mean, ref_opr=np.mean) + + +def test_var(): + common_test_reduce(opr=F.var, ref_opr=np.var) + + +def test_std(): + common_test_reduce(opr=F.std, ref_opr=np.std) + + +def test_min(): + common_test_reduce(opr=F.min, ref_opr=np.min) + + +def test_max(): + common_test_reduce(opr=F.max, ref_opr=np.max) + + +def test_argmin(): + common_test_reduce(opr=F.argmin, ref_opr=np.argmin) + + +def test_argmax(): + common_test_reduce(opr=F.argmax, ref_opr=np.argmax) + + +def test_sqrt(): + d1_shape = (15,) + d2_shape = (25,) + d1 = np.random.random(d1_shape).astype(np.float32) + d2 = np.random.random(d2_shape).astype(np.float32) + + cases = [{"input": d1}, {"input": d2}] + opr_test(cases, F.sqrt, ref_fn=np.sqrt) + + +def test_sort(): + data1_shape = (10, 3) + data2_shape = (12, 2) + data1 = np.random.random(data1_shape).astype(np.float32) + data2 = np.random.random(data2_shape).astype(np.float32) + output0 = [np.sort(data1), np.argsort(data1).astype(np.int32)] + output1 = [np.sort(data2), np.argsort(data2).astype(np.int32)] + + cases = [ + {"input": data1, "output": output0}, + {"input": data2, "output": output1}, + ] + opr_test(cases, F.sort) + + +def test_normalize(): + + cases = [ + {"input": np.random.random((2, 3, 12, 12)).astype(np.float32)} for i in range(2) + ] + + def np_normalize(x, p=2, axis=None, eps=1e-12): + if axis is None: + norm = np.sum(x ** p) ** (1.0 / p) + else: + norm = np.sum(x ** p, axis=axis, keepdims=True) ** (1.0 / p) + return x / np.clip(norm, a_min=eps, a_max=np.inf) + + # Test L-2 norm along all dimensions + opr_test(cases, F.normalize, ref_fn=np_normalize) + + # Test L-1 norm along all dimensions + opr_test(cases, partial(F.normalize, p=1), ref_fn=partial(np_normalize, p=1)) + + # Test L-2 norm along the second dimension + opr_test(cases, partial(F.normalize, axis=1), ref_fn=partial(np_normalize, axis=1)) + + # Test some norm == 0 + cases[0]["input"][0, 0, 0, :] = 0 + cases[1]["input"][0, 0, 0, :] = 0 + opr_test(cases, partial(F.normalize, axis=3), ref_fn=partial(np_normalize, axis=3)) + + +# def test_logsumexp(): +# x = np.arange(10).astype(np.float32) +# expected = np.log(np.sum(np.exp(x))) +# cases = [{"input": x, "output": expected}] +# compare_fn = partial(assertTensorClose, allow_special_values=True) +# # large value check +# n = 100 +# x = np.full(n, 10000, dtype=np.float32) +# expected = 10000 + np.log(n) +# cases.append({"input": x, "output": expected.astype(np.float32)}) +# opr_test(cases, F.logsumexp, axis=0, compare_fn=compare_fn) + +# # special value check +# x = np.array([np.inf], dtype=np.float32) +# expected = x +# cases = [{"input": x, "output": expected}] + +# x = np.array([-np.inf, 0.0], dtype=np.float32) +# expected = np.zeros(1).astype(np.float32) +# cases.append({"input": x, "output": expected}) +# opr_test(cases, F.logsumexp, axis=0, compare_fn=compare_fn) + +# x = np.array([np.nan], dtype=np.float32) +# expected = x +# cases = [{"input": x, "output": expected}] + +# x = np.array([-np.inf, 1], dtype=np.float32) +# expected = np.array([1.0], dtype=np.float32) +# cases.append({"input": x, "output": expected}) + +# opr_test(cases, F.logsumexp, axis=0, compare_fn=compare_fn) + +# # keepdims check +# x = np.array([[1e10, 1e-10], [-1e10, -np.inf]], dtype=np.float32) +# expected = np.array([[1e10], [-1e10]], dtype=np.float32) +# cases = [{"input": x, "output": expected}] +# x = np.array([[1e10, -1e-10, 1e-10], [1e10, 1e-10, np.inf]], dtype=np.float32) +# expected = np.array([[1e10], [np.inf]], dtype=np.float32) +# cases.append({"input": x, "output": expected}) +# opr_test(cases, F.logsumexp, axis=1, keepdims=True, compare_fn=compare_fn) + +# # multiple axes check +# x = np.array([[1e10, 1e-10], [-1e10, -np.inf]], dtype=np.float32) +# expected = np.array([1e10], dtype=np.float32) +# cases = [{"input": x, "output": expected}] +# x = np.array([[1e10, -1e-10, 1e-10], [1e10, 1e-10, np.inf]], dtype=np.float32) +# expected = np.array([np.inf], dtype=np.float32) +# cases.append({"input": x, "output": expected}) +# opr_test(cases, F.logsumexp, axis=(0, 1), keepdims=False, compare_fn=compare_fn) diff --git a/imperative/python/test/unit/functional/test_tensor.py b/imperative/python/test/unit/functional/test_tensor.py new file mode 100644 index 0000000000000000000000000000000000000000..018871a20706782d37001c94d39053ad45293611 --- /dev/null +++ b/imperative/python/test/unit/functional/test_tensor.py @@ -0,0 +1,313 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np +import pytest + +import megengine.functional as F +from megengine import Buffer, Parameter, is_cuda_available, tensor +from megengine.core.tensor.utils import astensor1d +from megengine.test import assertTensorClose + + +def _default_compare_fn(x, y): + assertTensorClose(x.numpy(), y) + + +def opr_test(cases, func, compare_fn=_default_compare_fn, ref_fn=None, **kwargs): + """ + func: the function to run opr. + compare_fn: the function to compare the result and expected, use assertTensorClose if None. + ref_fn: the function to generate expected data, should assign output if None. + cases: the list which have dict element, the list length should be 2 for dynamic shape test. + and the dict should have input, + and should have output if ref_fn is None. + should use list for multiple inputs and outputs for each case. + kwargs: The additional kwargs for opr func. + + simple examples: + + dtype = np.float32 + cases = [{"input": [10, 20]}, {"input": [20, 30]}] + opr_test(cases, + F.eye, + ref_fn=lambda n, m: np.eye(n, m).astype(dtype), + dtype=dtype) + + """ + + def check_results(results, expected): + if not isinstance(results, tuple): + results = (results,) + for r, e in zip(results, expected): + compare_fn(r, e) + + def get_param(cases, idx): + case = cases[idx] + inp = case.get("input", None) + outp = case.get("output", None) + if inp is None: + raise ValueError("the test case should have input") + if not isinstance(inp, list): + inp = (inp,) + else: + inp = tuple(inp) + if ref_fn is not None and callable(ref_fn): + outp = ref_fn(*inp) + if outp is None: + raise ValueError("the test case should have output or reference function") + if not isinstance(outp, list): + outp = (outp,) + else: + outp = tuple(outp) + + return inp, outp + + if len(cases) == 0: + raise ValueError("should give one case at least") + + if not callable(func): + raise ValueError("the input func should be callable") + + inp, outp = get_param(cases, 0) + inp_tensor = [tensor(inpi) for inpi in inp] + + results = func(*inp_tensor, **kwargs) + check_results(results, outp) + + +def test_eye(): + dtype = np.float32 + cases = [{"input": [10, 20]}, {"input": [20, 30]}] + for case in cases: + assertTensorClose( + F.eye(case["input"], dtype=dtype).numpy(), + np.eye(*case["input"]).astype(dtype), + ) + + +def test_concat(): + def get_data_shape(length: int): + return (length, 2, 3) + + data1 = np.random.random(get_data_shape(5)).astype("float32") + data2 = np.random.random(get_data_shape(6)).astype("float32") + data3 = np.random.random(get_data_shape(7)).astype("float32") + + def run(data1, data2): + return F.concat([data1, data2]) + + cases = [{"input": [data1, data2]}, {"input": [data1, data3]}] + opr_test(cases, run, ref_fn=lambda x, y: np.concatenate([x, y])) + + +def test_stack(): + data1 = np.random.random((3, 2, 2)).astype("float32") + data2 = np.random.random((3, 2, 2)).astype("float32") + data3 = np.random.random((3, 2, 2)).astype("float32") + + cases = [{"input": [data1, data2]}, {"input": [data1, data3]}] + for ai in range(3): + + def run(data1, data2): + return F.stack([data1, data2], axis=ai) + + opr_test(cases, run, ref_fn=lambda x, y: np.stack([x, y], axis=ai)) + + +def test_split(): + data = np.random.random((2, 3, 4, 5)).astype(np.float32) + mge_out1 = F.split(tensor(data), 2, axis=3) + mge_out2 = F.split(tensor(data), [3, 5], axis=3) + + np_out = np.split(data, [3, 5], axis=3) + + np.testing.assert_equal(mge_out1[0].numpy(), mge_out2[0].numpy()) + np.testing.assert_equal(mge_out1[0].numpy(), np_out[0]) + + +def test_reshape(): + x = np.arange(6, dtype="float32") + xx = tensor(x) + y = x.reshape(1, 2, 3) + + for shape in [ + (1, 2, 3), + (1, -1, 3), + (1, tensor(-1), 3), + np.array([1, -1, 3], dtype="int32"), + tensor([1, -1, 3]), + ]: + yy = F.reshape(xx, shape) + np.testing.assert_equal(yy.numpy(), y) + + +def test_squeeze(): + x = np.arange(6, dtype="float32").reshape(1, 2, 3, 1) + xx = tensor(x) + + for axis in [None, 3, -4, (3, -4)]: + y = np.squeeze(x, axis) + yy = F.squeeze(xx, axis) + np.testing.assert_equal(y, yy.numpy()) + + +def test_expand_dims(): + x = np.arange(6, dtype="float32").reshape(2, 3) + xx = tensor(x) + + for axis in [2, -3, (3, -4), (1, -4)]: + y = np.expand_dims(x, axis) + yy = F.expand_dims(xx, axis) + np.testing.assert_equal(y, yy.numpy()) + + +def test_elemwise_dtype_promotion(): + x = np.random.rand(2, 3).astype("float32") + y = np.random.rand(1, 3).astype("float16") + xx = tensor(x) + yy = tensor(y) + z = xx * yy + np.testing.assert_equal(z.numpy(), x * y) + + z = xx + y + np.testing.assert_equal(z.numpy(), x + y) + + z = x - yy + np.testing.assert_equal(z.numpy(), x - y) + + +def test_linspace(): + cases = [ + {"input": [1, 9, 9]}, + {"input": [3, 10, 8]}, + ] + opr_test( + cases, + F.linspace, + ref_fn=lambda start, end, step: np.linspace(start, end, step, dtype=np.float32), + ) + + cases = [ + {"input": [9, 1, 9]}, + {"input": [10, 3, 8]}, + ] + opr_test( + cases, + F.linspace, + ref_fn=lambda start, end, step: np.linspace(start, end, step, dtype=np.float32), + ) + + +def test_arange(): + cases = [ + {"input": [1, 9, 1]}, + {"input": [2, 10, 2]}, + ] + opr_test( + cases, + F.arange, + ref_fn=lambda start, end, step: np.arange(start, end, step, dtype=np.float32), + ) + + cases = [ + {"input": [9, 1, -1]}, + {"input": [10, 2, -2]}, + ] + opr_test( + cases, + F.arange, + ref_fn=lambda start, end, step: np.arange(start, end, step, dtype=np.float32), + ) + + cases = [ + {"input": [9.3, 1.2, -0.5]}, + {"input": [10.3, 2.1, -1.7]}, + ] + opr_test( + cases, + F.arange, + ref_fn=lambda start, end, step: np.arange(start, end, step, dtype=np.float32), + ) + + +def test_round(): + data1_shape = (15,) + data2_shape = (25,) + data1 = np.random.random(data1_shape).astype(np.float32) + data2 = np.random.random(data2_shape).astype(np.float32) + + cases = [{"input": data1}, {"input": data2}] + opr_test(cases, F.round, ref_fn=np.round) + + +def test_broadcast(): + input1_shape = (20, 30) + output1_shape = (30, 20, 30) + data1 = np.random.random(input1_shape).astype(np.float32) + + input2_shape = (10, 20) + output2_shape = (20, 10, 20) + data2 = np.random.random(input2_shape).astype(np.float32) + + def compare_fn(x, y): + assert x.numpy().shape == y + + cases = [ + {"input": [data1, output1_shape], "output": output1_shape}, + {"input": [data2, output2_shape], "output": output2_shape}, + ] + opr_test(cases, F.broadcast, compare_fn=compare_fn) + + +def test_utils_astensor1d(): + reference = tensor(0) + + # literal + x = [1, 2, 3] + for dtype in [None, "float32"]: + xx = astensor1d(x, reference, dtype=dtype) + assert type(xx) is tensor + np.testing.assert_equal(xx.numpy(), x) + + # numpy array + x = np.asarray([1, 2, 3], dtype="int32") + for dtype in [None, "float32"]: + xx = astensor1d(x, reference, dtype=dtype) + assert type(xx) is tensor + np.testing.assert_equal(xx.numpy(), x.astype(dtype) if dtype else x) + + # tensor + x = tensor([1, 2, 3], dtype="int32") + for dtype in [None, "float32"]: + xx = astensor1d(x, reference, dtype=dtype) + assert type(xx) is tensor + np.testing.assert_equal(xx.numpy(), x.numpy()) + + # mixed + x = [1, tensor(2), 3] + for dtype in [None, "float32"]: + xx = astensor1d(x, reference, dtype=dtype) + assert type(xx) is tensor + np.testing.assert_equal(xx.numpy(), [1, 2, 3]) + + +def test_device(): + x = tensor([1, 2, 3], dtype="float32") + + y1 = F.eye(x.shape, dtype="float32") + y2 = F.eye(x.shape, dtype="float32", device=None) + np.testing.assert_almost_equal(y1.numpy(), y2.numpy()) + + y3 = F.eye(x.shape, dtype="float32", device="xpux") + y4 = F.eye(x.shape, dtype="float32", device=x.device.to_c()) + np.testing.assert_almost_equal(y3.numpy(), y4.numpy()) + + y5 = F.full((3, 2), 4, device=x.device) + y6 = F.full((3, 2), 4, device="xpux") + np.testing.assert_almost_equal(y5.numpy(), y6.numpy()) diff --git a/imperative/python/test/unit/quantization/quantize.py b/imperative/python/test/unit/quantization/quantize.py new file mode 100644 index 0000000000000000000000000000000000000000..236ef9e137e5c95da76855791c8759c450a75b67 --- /dev/null +++ b/imperative/python/test/unit/quantization/quantize.py @@ -0,0 +1,80 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from megengine import module as Float +from megengine.module import qat as QAT +from megengine.quantization.quantize import _get_quantable_module_names, quantize_qat + + +def test_get_quantable_module_names(): + # need to make sure names from Quantized and QAT are the same + def _get_qat_module_names(): + def is_qat(key: str): + value = getattr(QAT, key) + return ( + isinstance(value, type) + and issubclass(value, QAT.QATModule) + and value != QAT.QATModule + ) + + # source should have all quantable modules' names + quantable_module_names = [key for key in dir(QAT) if is_qat(key)] + return quantable_module_names + + qat_module_names = _get_qat_module_names() + quantized_module_names = _get_quantable_module_names() + assert set(qat_module_names) == set(quantized_module_names) + + for key in qat_module_names: + value = getattr(Float, key) + assert ( + isinstance(value, type) + and issubclass(value, Float.Module) + and value != Float.Module + ) + + +def test_disable_quantize(): + class Net(Float.Module): + def __init__(self): + super().__init__() + self.conv = Float.ConvBnRelu2d(3, 3, 3) + self.conv.disable_quantize() + + def forward(self, x): + return self.conv(x) + + net = Net() + qat_net = quantize_qat(net, inplace=False) + assert isinstance(qat_net.conv, Float.ConvBnRelu2d) + assert isinstance(qat_net.conv.conv, Float.Conv2d) + + +def test_convert_with_custom_mapping(): + class FloatExample(Float.Module): + def forward(self, x): + return x + + class QATExample(QAT.QATModule): + def forward(self, x): + return x + + @classmethod + def from_float_module(cls, float_module): + return cls() + + class Net(Float.Module): + def __init__(self): + super().__init__() + self.example = FloatExample() + + def forward(self, x): + return self.example(x) + + net = Net() + qat_net = quantize_qat(net, inplace=False, mapping={FloatExample: QATExample}) + assert isinstance(qat_net.example, QATExample) diff --git a/imperative/python/test/unit/quantization/test_fake_quant.py b/imperative/python/test/unit/quantization/test_fake_quant.py new file mode 100644 index 0000000000000000000000000000000000000000..ff999b75828b2eb8dbc3d4ae41779c8a198b457f --- /dev/null +++ b/imperative/python/test/unit/quantization/test_fake_quant.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np +import pytest + +import megengine as mge +from megengine import tensor +from megengine.quantization.fake_quant import TQT_Function +from megengine.quantization.internal_fake_quant import * +from megengine.test import assertTensorClose + + +class numpy_TQT_Function: + def __init__(self, lowerbound, upperbound): + super().__init__() + self.lowerbound = lowerbound + self.upperbound = upperbound + + def forward(self, inp, scale): + t = 2 ** scale + # t = F.maximum(t, 1e-4) + inp_scaled = inp / t + inp_clipped = np.maximum( + np.minimum(inp_scaled, self.upperbound), self.lowerbound + ) + inp_rounded = np.round(inp_clipped) + inp_flq = inp_rounded * t + self.saved_tensors = (inp_scaled, inp_rounded, t) + return inp_flq + + def backward(self, grad_inp_flq): + (inp_scaled, inp_rounded, t) = self.saved_tensors + mask_clip = (inp_scaled < -0.5 + self.lowerbound) + ( + inp_scaled > self.upperbound + 0.5 + ) # mask for accumulating the gradients of |data_scaled|>L + mask_quant = np.abs( + mask_clip - 1 + ) # mask for accumulating the gradients with |data_scaled|<=L + grad_quant = ( + grad_inp_flq * mask_quant * (inp_rounded - inp_scaled) + ) # gradient within |data_scaled|<=L + grad_clip = ( + grad_inp_flq * mask_clip * inp_rounded + ) # gradient with | data_scaled|>L + grad_s = grad_clip.sum() + grad_quant.sum() + # dL/ds = dL/dt * t * ln(2) + grad_s = grad_s * t * np.log(2) + grad_inp = grad_inp_flq * mask_quant + return grad_inp, grad_s + + +def test_TQT(): + f = TQT_Function(-127, 127) + nf = numpy_TQT_Function(-127, 127) + + def check_inp(a, b, c, a_np, b_np, c_np): + assertTensorClose( + f.forward(a, b).numpy(), nf.forward(a_np, b_np).astype("float32") + ) + c1, c2 = f.backward(c) + c1_np, c2_np = nf.backward(c_np) + assertTensorClose(c1.numpy(), c1_np.astype("float32")) + assertTensorClose(c2.numpy(), c2_np.astype("float32")) + + a_np = np.random.random((4, 3)).astype("float32") + b_np = np.random.random((1)).astype("float32") + a = tensor(a_np) + b = tensor(b_np) + check_inp(a, b, b, a_np, b_np, b_np) + + diff --git a/imperative/python/test/unit/test_autodiff.py b/imperative/python/test/unit/test_autodiff.py new file mode 100644 index 0000000000000000000000000000000000000000..929e967cae28069bc287dcaae5159160d4b61d2d --- /dev/null +++ b/imperative/python/test/unit/test_autodiff.py @@ -0,0 +1,227 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import weakref + +import numpy as np +import pytest + +import megengine as mge +import megengine.distributed as dist +from megengine.core._imperative_rt import TensorAttr, imperative +from megengine.core._imperative_rt.imperative import sync +from megengine.core.autodiff.grad import Grad +from megengine.core.ops.builtin import Elemwise +from megengine.core.tensor.raw_tensor import as_raw_tensor +from megengine.core.tensor.tensor import Tensor, apply +from megengine.core.tensor.tensor_wrapper import TensorWrapper +from megengine.functional.distributed import remote_recv, remote_send + + +def _elwise(mode): + op = Elemwise(mode=mode) + + def f(*args): + (result,) = apply(op, *args) + return result + + return f + + +add = _elwise("add") +mul = _elwise("mul") +cos = _elwise("cos") +relu = _elwise("relu") + + +def as_tensor(x): + return Tensor(as_raw_tensor(x, device=mge.device.get_default_device())) + + +def save_to(self, name="grad"): + def callback(tensor, grad): + setattr(self, name, grad) + + return callback + + +@pytest.mark.isolated_distributed +def test_dist_grad(): + world_size = 2 + x_np = np.random.rand(10).astype("float32") + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker0(): + dist.init_process_group("localhost", port, world_size, 0, 0) + mge.device.set_default_device("gpu0") + grad = Grad() + + x = as_tensor(x_np) + grad.wrt(x, callback=save_to(x)) + # need a placeholder to trace operator + send_x = remote_send(x, 1) + recv_x = remote_recv(1, x_np.shape, x_np.dtype, "gpu0") + y = recv_x * recv_x + + grad([y], [as_tensor(np.ones_like(x_np))]) + np.testing.assert_almost_equal(x.grad.numpy(), x.numpy() * 2) + + def worker1(): + dist.init_process_group("localhost", port, world_size, 1, 1) + mge.device.set_default_device("gpu1") + grad = Grad() + + recv_x = remote_recv(0, x_np.shape, x_np.dtype, "gpu1") + send_x = remote_send(recv_x, 0) + + grad([], []) + + # sync because grad has a send operator + sync() + send_x.device._cn._sync_all() + + import multiprocessing as mp + + p0 = mp.Process(target=worker0) + p1 = mp.Process(target=worker1) + p0.start() + p1.start() + p0.join(10) + p1.join(10) + assert p0.exitcode == 0 and p1.exitcode == 0 + + +def test_grad(): + x_np = np.random.rand(10).astype("float32") + x = as_tensor(x_np) + + grad = Grad().wrt(x, callback=save_to(x)) + + y = cos(x) + + grad(y, as_tensor(np.ones_like(x_np))) + np.testing.assert_almost_equal(x.grad.numpy(), -np.sin(x_np)) + + +def test_grad_2(): + x_np = np.random.rand(10).astype("float32") + x = as_tensor(x_np) + + grad = Grad().wrt(x, callback=save_to(x)) + + y = mul(x, x) + y = mul(y, y) + + grad(y, as_tensor(np.ones_like(x_np))) + np.testing.assert_almost_equal(x.grad.numpy(), 4 * x_np ** 3, decimal=6) + + +@pytest.mark.skip(reason="high order gradient was not implemented yet") +def test_2nd_grad(): + x_np = np.random.rand(10).astype("float32") + x = as_tensor(x_np) + ones = as_tensor(np.ones_like(x_np)) + + grad = Grad().wrt(x, callback=save_to(x)) + grad2 = Grad().wrt(x, callback=save_to(x)) + + y = cos(x) + + grad(y, ones) + np.testing.assert_almost_equal(x.grad.numpy(), -np.sin(x_np), decimal=5) + + grad2(x.grad, ones) + np.testing.assert_almost_equal(x.grad.numpy(), -np.cos(x_np)) + + +def test_grad_with_tensor_wrapper(): + x_np = np.random.rand(10).astype("float32") + x = TensorWrapper(x_np) + + grad = Grad().wrt(x, callback=save_to(x)) + + y = mul(x, x) + y = mul(y, y) + + grad(y, TensorWrapper(np.ones_like(x_np))) + np.testing.assert_almost_equal(x.grad.numpy(), 4 * x_np ** 3, decimal=6) + + +def test_grad_inplace(): + x_np = np.random.rand(10).astype("float32") + x = TensorWrapper(x_np) + + grad = Grad().wrt(x, callback=save_to(x)) + + y = mul(x, x) + y *= y + + grad(y, TensorWrapper(np.ones_like(x_np))) + np.testing.assert_almost_equal(x.grad.numpy(), 4 * x_np ** 3, decimal=6) + + +def test_elemwise_add(): + x_np = np.random.rand(10).astype("float32") + y_np = np.random.rand(10, 10).astype("float32") + dz_np = np.random.rand(10, 10).astype("float32") + x = TensorWrapper(x_np) + y = TensorWrapper(y_np) + dz = TensorWrapper(dz_np) + + refs = {} + + def f(x, y): + x = x * 2 + refs["x"] = weakref.ref(x.__wrapped__) + refs["y"] = weakref.ref(y.__wrapped__) + return x + y + + grad = Grad().wrt(x, callback=save_to(x)) + + z = f(x, y) + del y + + for k, r in refs.items(): + assert r() is None + + grad(z, dz) + np.testing.assert_almost_equal(x.grad.numpy(), dz_np.sum(0) * 2, decimal=5) + + +def test_elemwise_relu(): + x_np = [1.0, -1.0] + dz_np = [1.0] + x = TensorWrapper(x_np) + dz = TensorWrapper(dz_np) + + refs = {} + + def f(x): + x = x * 2 + refs["x"] = weakref.ref(x.__wrapped__) + return relu(x) + + grad = Grad().wrt(x, callback=save_to(x)) + + z = f(x) + + assert refs["x"]() is None + + grad(z, dz) + np.testing.assert_almost_equal(x.grad.numpy(), [2.0, 0]) + + +def test_elemwise_relu_backward_fn(): + op = Elemwise(mode="relu").to_c() + attr = TensorAttr() + attr.dtype = "float32" + attr.comp_node = "xpux" + result = imperative.make_backward_graph(op, [attr], [True], [True]) + backward_graph, save_for_backward_mask, input_has_grad = result + assert save_for_backward_mask == [False, True, True], save_for_backward_mask diff --git a/imperative/python/test/unit/test_distributed.py b/imperative/python/test/unit/test_distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..70692eb9f2aaead42c9918ce9cdc15084a1502ac --- /dev/null +++ b/imperative/python/test/unit/test_distributed.py @@ -0,0 +1,193 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import multiprocessing as mp +import platform +import queue + +import pytest + +import megengine as mge +import megengine.distributed as dist + + +def _assert_q_empty(q): + try: + res = q.get(timeout=1) + except Exception as e: + assert isinstance(e, queue.Empty) + else: + assert False, "queue is not empty" + + +def _assert_q_val(q, val): + ret = q.get() + assert ret == val + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_init_process_group(): + world_size = 2 + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker(rank, backend): + if mge.get_device_count("gpu") < world_size: + return + dist.init_process_group("localhost", port, world_size, rank, rank, backend) + assert dist.is_distributed() == True + assert dist.get_rank() == rank + assert dist.get_world_size() == world_size + assert dist.get_backend() == backend + + py_server_addr = dist.get_py_server_addr() + assert py_server_addr[0] == "localhost" + assert py_server_addr[1] == port + + mm_server_addr = dist.get_mm_server_addr() + assert mm_server_addr[0] == "localhost" + assert mm_server_addr[1] > 0 + + assert isinstance(dist.get_client(), dist.Client) + + def check(backend): + procs = [] + for rank in range(world_size): + p = mp.Process(target=worker, args=(rank, backend)) + p.start() + procs.append(p) + + for p in procs: + p.join(20) + assert p.exitcode == 0 + + check("nccl") + check("ucx") + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_new_group(): + world_size = 3 + ranks = [2, 0] + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker(rank): + if mge.get_device_count("gpu") < world_size: + return + dist.init_process_group("localhost", port, world_size, rank, rank) + if rank in ranks: + group = dist.new_group(ranks) + assert group.size == 2 + assert group.key == "2,0" + assert group.rank == ranks.index(rank) + assert group.comp_node == "gpu{}:2".format(rank) + + procs = [] + for rank in range(world_size): + p = mp.Process(target=worker, args=(rank,)) + p.start() + procs.append(p) + + for p in procs: + p.join(20) + assert p.exitcode == 0 + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_group_barrier(): + world_size = 2 + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + def worker(rank, q): + if mge.get_device_count("gpu") < world_size: + return + dist.init_process_group("localhost", port, world_size, rank, rank) + dist.group_barrier() + if rank == 0: + dist.group_barrier() + q.put(0) # to be observed in rank 1 + else: + _assert_q_empty(q) # q.put(0) is not executed in rank 0 + dist.group_barrier() + _assert_q_val(q, 0) # q.put(0) executed in rank 0 + + Q = mp.Queue() + procs = [] + for rank in range(world_size): + p = mp.Process(target=worker, args=(rank, Q)) + p.start() + procs.append(p) + + for p in procs: + p.join(20) + assert p.exitcode == 0 + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_synchronized(): + world_size = 2 + port = dist.get_free_ports(1)[0] + server = dist.Server(port) + + @dist.synchronized + def func(rank, q): + q.put(rank) + + def worker(rank, q): + if mge.get_device_count("gpu") < world_size: + return + dist.init_process_group("localhost", port, world_size, rank, rank) + dist.group_barrier() + if rank == 0: + func(0, q) # q.put(0) + q.put(2) + else: + _assert_q_val(q, 0) # func executed in rank 0 + _assert_q_empty(q) # q.put(2) is not executed + func(1, q) + _assert_q_val( + q, 1 + ) # func in rank 1 executed earlier than q.put(2) in rank 0 + _assert_q_val(q, 2) # q.put(2) executed in rank 0 + + Q = mp.Queue() + procs = [] + for rank in range(world_size): + p = mp.Process(target=worker, args=(rank, Q)) + p.start() + procs.append(p) + + for p in procs: + p.join(20) + assert p.exitcode == 0 diff --git a/imperative/python/test/unit/test_function.py b/imperative/python/test/unit/test_function.py new file mode 100644 index 0000000000000000000000000000000000000000..8d46e26e23142fdc235b1ef26039f037153028a6 --- /dev/null +++ b/imperative/python/test/unit/test_function.py @@ -0,0 +1,128 @@ +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +import megengine.optimizer as optimizer +from megengine import Parameter +from megengine import Tensor as tensor +from megengine import tensor +from megengine.core.tensor.function import Function +from megengine.module import Module + + +def test_single_input(): + data_shape = (9, 2, 6) + av = np.random.random(data_shape).astype(np.float32) + + class MulFunc(Function): + def forward(self, a): + self.a = a + return a * 10 + + def backward(self, grad_o): + return grad_o * 10 + + class Simple(Module): + def __init__(self, a): + super().__init__() + self.a = Parameter(a, dtype=np.float32) + self.layer1 = MulFunc() + + def forward(self): + x = self.layer1(self.a) + return x + + net = Simple(av) + optim = optimizer.SGD(net.parameters(), lr=1.0) + optim.zero_grad() + + with optim.record(): + loss = net() + optim.backward(loss.sum()) + optim.step() + + np.testing.assert_almost_equal(loss.numpy(), (av * 10)) + np.testing.assert_almost_equal(net.a.numpy(), (av - 10)) + + +def test_multi_input(): + data_shape = (9, 2, 6) + av = np.random.random(data_shape).astype(np.float32) + bv = np.random.random(data_shape).astype(np.float32) + + class MulFunc(Function): + def forward(self, a, b): + self.a = a + self.b = b + return a * b + + def backward(self, grad_o): + return grad_o * self.b * 2, grad_o * self.a * 3 + + class Simple(Module): + def __init__(self, a, b): + super().__init__() + self.a = Parameter(a, dtype=np.float32) + self.b = Parameter(b, dtype=np.float32) + self.layer1 = MulFunc() + + def forward(self): + x = self.layer1(self.a, self.b) + return x + + net = Simple(av, bv) + optim = optimizer.SGD(net.parameters(), lr=1.0) + optim.zero_grad() + + with optim.record(): + loss = net() + optim.backward(loss.sum()) + optim.step() + + np.testing.assert_almost_equal(loss.numpy(), (av * bv)) + np.testing.assert_almost_equal(net.a.numpy(), (av - 2 * bv)) + np.testing.assert_almost_equal(net.b.numpy(), (bv - 3 * av)) + + +def test_multi_output(): + data_shape = (9, 2, 6) + av = np.random.random(data_shape).astype(np.float32) + bv = np.random.random(data_shape).astype(np.float32) + + class MulFunc(Function): + def forward(self, a, b): + self.a = a + self.b = b + return a * b, a + b + + def backward(self, grad_1, grad_2): + return grad_1 * (self.b + 1), grad_2 * (self.a + 1) + + class Simple(Module): + def __init__(self, a, b): + super().__init__() + self.a = Parameter(a, dtype=np.float32) + self.b = Parameter(b, dtype=np.float32) + self.layer1 = MulFunc() + + def forward(self): + x, y = self.layer1(self.a, self.b) + return x + y + + net = Simple(av, bv) + optim = optimizer.SGD(net.parameters(), lr=1.0) + optim.zero_grad() + + with optim.record(): + loss = net() + optim.backward(loss.sum()) + optim.step() + + np.testing.assert_almost_equal(loss.numpy(), (av * bv + av + bv), decimal=6) + np.testing.assert_almost_equal(net.a.numpy(), (av - bv - 1), decimal=6) + np.testing.assert_almost_equal(net.b.numpy(), (bv - av - 1), decimal=6) diff --git a/imperative/python/test/unit/test_imperative_rt.py b/imperative/python/test/unit/test_imperative_rt.py new file mode 100644 index 0000000000000000000000000000000000000000..959a08c4adb9c9e438886fe512b7886a0cfb25ca --- /dev/null +++ b/imperative/python/test/unit/test_imperative_rt.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np +import pytest + +import megengine.core.tensor.raw_tensor +from megengine.core.tensor.core import apply + + +def elemwise(*args, mode): + from megengine.core.ops.builtin import Elemwise + from megengine.core._imperative_rt.imperative import apply_op + + return apply_op(Elemwise(mode=mode).to_c(), args) + + +def test_basic_interface(): + cf = megengine.core._imperative_rt.OperatorNodeConfig() + cf.name = "megengine.core" + cf.dtype = "float32" + cf.comp_node_arr = ["xpux"] + print(cf.name) + print(cf.dtype) + print(cf.comp_node_arr) + print(cf.comp_node) + cf.comp_node_arr = ["xpux", "xpux:1"] + with pytest.raises(ValueError): + cf.comp_node + + +def test_opr_attr(): + from megengine.core.ops.builtin import Elemwise + + assert Elemwise(mode="add") == Elemwise(mode="add") + + +def test_simple_arith(): + x = np.random.rand(10).astype("float32") + xx = megengine.core._imperative_rt.put(x) + (yy,) = elemwise(xx, xx, mode="mul") + np.testing.assert_allclose(x * x, megengine.core._imperative_rt.get_value(yy)) + megengine.core._imperative_rt.delete(xx) + megengine.core._imperative_rt.delete(yy) + + +def test_tensor_on_device(): + device = megengine.core._imperative_rt.CompNode("cpu0:1") + x = np.random.rand(10).astype("float32") + xx = megengine.core._imperative_rt.put(x, device=device) + assert str(megengine.core._imperative_rt.get_device(xx)) == "cpu0:1" + np.testing.assert_equal(x, megengine.core._imperative_rt.get_value(xx)) + megengine.core._imperative_rt.delete(xx) + + +def test_raw_tensor(): + from megengine.core.tensor.raw_tensor import as_raw_tensor + from megengine.core.ops.builtin import Elemwise + + x = np.random.rand(10).astype("float32") + xx = as_raw_tensor(x) + (yy,) = apply(Elemwise(mode="mul"), xx, xx) + np.testing.assert_allclose(x * x, yy.numpy()) + (yy,) = apply(Elemwise(mode="mul"), xx, xx) + np.testing.assert_allclose(x * x, yy.numpy()) diff --git a/imperative/python/test/unit/test_indexing_op.py b/imperative/python/test/unit/test_indexing_op.py new file mode 100644 index 0000000000000000000000000000000000000000..70b2911f046883eca5d2fbd96b44b1191034ea1f --- /dev/null +++ b/imperative/python/test/unit/test_indexing_op.py @@ -0,0 +1,546 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import collections + +import numpy as np +import pytest + +import megengine.core.ops.builtin +import megengine.core.tensor.raw_tensor +from megengine.core.ops._internal import all_ops +from megengine.core.tensor import Tensor +from megengine.core.tensor.core import apply +from megengine.core.tensor.raw_tensor import RawTensor, as_raw_tensor + + +def cvt_to_shape_desc(val, inpvar, config=None): + def as_tensor(val, device): + assert device is not None, "can not infer device" + # TODO: should copy to appropriate device + val = as_raw_tensor(val, device=device) + return val + + device = None + if inpvar is not None: + assert isinstance(inpvar, RawTensor) + device = device or inpvar.device + + if config is not None: + device = device or config.device + + if isinstance(val, RawTensor): + return as_tensor(val, device) + + if not isinstance(val, collections.Iterable): + val = [val] + + components = [] + on_host = True + for i in val: + if isinstance(i, RawTensor): + on_host = False + device = device or i.device + else: + assert isinstance(i, int), ( + "shape desc could contain either int or Tensor, got {}" + " actually".format(repr(i)) + ) + components.append(i) + assert components, "shape desc could not be empty" + + if on_host: + shape = np.ascontiguousarray(components, dtype=np.int32) + assert np.all(shape == components), "failed to convert to shape: {}".format( + components + ) + return as_tensor(shape, device) + + for idx, v in enumerate(components): + if not isinstance(v, RawTensor): + vi = int(v) + assert vi == v, "could not convert {} to int".format(v) + v = vi + components[idx] = as_tensor(v, device) + + return invoke_op(all_oprs.Concat(axis=0), components) + + +def canonize_reshape(inputs, *, config): + src, tshape = inputs + tshape = cvt_to_shape_desc(tshape, src, config) + return src, tshape + + +def canonize_inputs(inputs, *, config): + """convert immediate numbers and SharedND to SymbolVar in inputs; at least + one of the inputs must be SymbolVar, so comp node and comp graph can + beinferred + + :return: list of converted vars + """ + + if ( + isinstance(inputs, (list, tuple)) + and len(inputs) == 1 + and isinstance(inputs[0], (list, tuple)) + ): + # handle the case when a list is passed to a function with + # variable-length argument (e.g. concat has signature concat(*inputs) + # and is called with concat([a, b])) + inputs = inputs[0] + + if isinstance(inputs, RawTensor): + return [inputs] + + old_inputs = inputs + inputs = [] + get_comp_node = None + need_cvt = False + for i in old_inputs: + if isinstance(i, RawTensor): + get_comp_node = lambda cn=i.device.to_c(): cn + else: + need_cvt = True + inputs.append(i) + if not need_cvt: + return inputs + + if get_comp_node is None: + + def get_comp_node(): + return config.comp_node + + for idx, var in enumerate(inputs): + if not isinstance(var, RawTensor): + var = as_raw_tensor(var) + inputs[idx] = var + return inputs + + +def invoke_op(op, inputs_, cvt_inputs=canonize_inputs): + inputs = cvt_inputs( + inputs_, config=megengine.core._imperative_rt.OperatorNodeConfig() + ) + return apply(op, *inputs) + + +def unpack_getitem(inp, tuple_val, *, allow_newaxis=True): + assert isinstance(inp, RawTensor) + if not isinstance(tuple_val, tuple): + tuple_val = (tuple_val,) + + def as_tensor(v): + if not isinstance(v, RawTensor): + vi = np.ascontiguousarray(v, dtype=np.int32) + assert np.abs(vi - v).max() == 0, "bad index: {!r}".format(v) + v = as_raw_tensor(vi) + return v + + new_axes = [] + tensors = [] + items = [] + cur_axis = -1 + for i_idx, i in enumerate(tuple_val): + cur_axis += 1 + if i is np.newaxis: + if cur_axis >= 0: + new_axes.append(cur_axis) + continue + + if i is Ellipsis: + cur_axis = -1 + for j in tuple_val[:i_idx:-1]: + if j is Ellipsis: + raise IndexError("only one ellipsis is allowed") + if j is np.newaxis: + new_axes.append(cur_axis) + cur_axis -= 1 + continue + + item = [ + cur_axis, + ] + + def push(v, item, tensors): + if v is None: + item.append(False) + else: + item.append(True) + tensors.append(as_tensor(v)) + + if isinstance(i, slice): + if i.start is None and i.stop is None and i.step is None: + continue + push(i.start, item, tensors) + push(i.stop, item, tensors) + push(i.step, item, tensors) + item.append(False) # idx + else: + item += [False,] * 3 # begin, end, stop + push(i, item, tensors) + assert len(item) == 5 + items.append(item) + if new_axes: + raise IndexError("newaxis is not allowed here") + return inp, tensors, items + + +def dimshuffle(*args, **kwargs): + op = all_ops.Dimshuffle(**kwargs).to_c() + return invoke_op(op, args) + + +def broadcast(input, tshape): + op = all_ops.Broadcast().to_c() + return invoke_op(op, (input, tshape), canonize_reshape) + + +def subtensor(input, tuple_val): + input, tensors, items = unpack_getitem(input, tuple_val) + op = all_ops.Subtensor(items).to_c() + return invoke_op(op, (input, *tensors)) + + +def set_subtensor(input, value, tuple_val): + input, tensors, items = unpack_getitem(input, tuple_val) + op = all_ops.SetSubtensor(items).to_c() + return invoke_op(op, (input, value, *tensors)) + + +def incr_subtensor(input, value, tuple_val): + input, tensors, items = unpack_getitem(input, tuple_val) + op = all_ops.IncrSubtensor(items).to_c() + return invoke_op(op, (input, value, *tensors)) + + +def advance_indexing(input, tuple_val): + input, tensors, items = unpack_getitem(input, tuple_val) + op = all_ops.IndexingMultiAxisVec(items).to_c() + return invoke_op(op, (input, *tensors)) + + +def set_advance_indexing(input, value, tuple_val): + input, tensors, items = unpack_getitem(input, tuple_val) + op = all_ops.IndexingSetMultiAxisVec(items).to_c() + return invoke_op(op, (input, value, *tensors)) + + +def incr_advance_indexing(input, value, tuple_val): + input, tensors, items = unpack_getitem(input, tuple_val) + op = all_ops.IndexingIncrMultiAxisVec(items).to_c() + return invoke_op(op, (input, value, *tensors)) + + +def mesh_indexing(input, tuple_val): + input, tensors, items = unpack_getitem(input, tuple_val) + op = all_ops.MeshIndexing(items).to_c() + return invoke_op(op, (input, *tensors)) + + +def set_mesh_indexing(input, value, tuple_val): + input, tensors, items = unpack_getitem(input, tuple_val) + op = all_ops.SetMeshIndexing(items).to_c() + return invoke_op(op, (input, value, *tensors)) + + +def incr_mesh_indexing(input, value, tuple_val): + input, tensors, items = unpack_getitem(input, tuple_val) + op = all_ops.IncrMeshIndexing(items).to_c() + return invoke_op(op, (input, value, *tensors)) + + +def batched_mesh_indexing(input, tuple_val): + input, tensors, items = unpack_getitem(input, tuple_val) + op = all_ops.BatchedMeshIndexing(items).to_c() + return invoke_op(op, (input, *tensors)) + + +def batched_set_mesh_indexing(input, value, tuple_val): + input, tensors, items = unpack_getitem(input, tuple_val) + op = all_ops.BatchedSetMeshIndexing(items).to_c() + return invoke_op(op, (input, value, *tensors)) + + +def batched_incr_mesh_indexing(input, value, tuple_val): + input, tensors, items = unpack_getitem(input, tuple_val) + op = all_ops.BatchedIncrMeshIndexing(items).to_c() + return invoke_op(op, (input, value, *tensors)) + + +def test_dimshuffle(): + x = np.arange(10).reshape(2, 5).astype("int32") + xx = as_raw_tensor(x) + (yy,) = dimshuffle(xx, pattern="1x0") + np.testing.assert_equal(np.expand_dims(x.transpose(), axis=1), yy.numpy()) + + +def test_broadcast(): + x = np.arange(10).reshape(1, 10).astype("int32") + xx = as_raw_tensor(x) + (yy,) = broadcast(xx, (10, 10)) + np.testing.assert_equal(np.repeat(x, 10, 0), yy.numpy()) + + +def test_subtensor(): + x = np.arange(25).reshape(5, 5).astype("int32") + d = np.arange(2).astype("int32") + xx = as_raw_tensor(x) + (yy0,) = subtensor(xx, (slice(0, 4, 2), 3)) + (yy1,) = set_subtensor(xx, d, (slice(0, 4, 2), 3)) + (yy2,) = incr_subtensor(xx, d, (slice(0, 4, 2), 3)) + + np.testing.assert_equal(x[0:4:2, 3], yy0.numpy()) + + x_ = x.copy() + x_[0:4:2, 3] = d + np.testing.assert_equal(x_, yy1.numpy()) + + x_ = x.copy() + x_[0:4:2, 3] += d + np.testing.assert_equal(x_, yy2.numpy()) + + +def test_advance_indexing(): + x = np.arange(25).reshape(5, 5).astype("int32") + d = np.arange(15).reshape(3, 5).astype("int32") + xx = as_raw_tensor(x) + (yy0,) = advance_indexing(xx, ((0, 4, 2), slice(None, None, None))) + (yy1,) = set_advance_indexing(xx, d, ((0, 4, 2), slice(None, None, None))) + (yy2,) = incr_advance_indexing(xx, d, ((0, 4, 2), slice(None, None, None))) + + np.testing.assert_equal(x[(0, 4, 2), :], yy0.numpy()) + + x_ = x.copy() + x_[(0, 4, 2), :] = d + np.testing.assert_equal(x_, yy1.numpy()) + + x_ = x.copy() + x_[(0, 4, 2), :] += d + np.testing.assert_equal(x_, yy2.numpy()) + + +def test_mesh_indexing(): + x = np.arange(25).reshape(5, 5).astype("int32") + d = np.arange(6).reshape(3, 2).astype("int32") + xx = as_raw_tensor(x) + (yy0,) = mesh_indexing(xx, (slice(0, 5, 2), (1, 3))) + (yy1,) = set_mesh_indexing(xx, d, (slice(0, 5, 2), (1, 3))) + (yy2,) = incr_mesh_indexing(xx, d, (slice(0, 5, 2), (1, 3))) + + r = np.ndarray(shape=(3, 2), dtype="int32") + for i0, i1 in enumerate(range(0, 5, 2)): + for j0, j1 in enumerate((1, 3)): + r[i0, j0] = x[i1, j1] + np.testing.assert_equal(r, yy0.numpy()) + + r = x.copy() + for i0, i1 in enumerate(range(0, 5, 2)): + for j0, j1 in enumerate((1, 3)): + r[i1, j1] = d[i0, j0] + np.testing.assert_equal(r, yy1.numpy()) + + r = x.copy() + for i0, i1 in enumerate(range(0, 5, 2)): + for j0, j1 in enumerate((1, 3)): + r[i1, j1] += d[i0, j0] + np.testing.assert_equal(r, yy2.numpy()) + + +def test_batched_mesh_indexing(): + x = np.arange(24).reshape(2, 3, 4).astype("int32") + d = np.arange(12).reshape(2, 2, 3).astype("int32") + xx = as_raw_tensor(x) + s = [(0, 1, 2), (1, 2, 3)] + (yy0,) = batched_mesh_indexing(xx, (slice(None, None, None), [(0, 2)] * 2, s)) + (yy1,) = batched_set_mesh_indexing( + xx, d, (slice(None, None, None), [(0, 2)] * 2, s) + ) + (yy2,) = batched_incr_mesh_indexing( + xx, d, (slice(None, None, None), [(0, 2)] * 2, s) + ) + + r = np.ndarray(shape=(2, 2, 3), dtype="int32") + for i in range(2): + for j0, j1 in enumerate((0, 2)): + for k0, k1 in enumerate(s[i]): + r[i, j0, k0] = x[i, j1, k1] + np.testing.assert_equal(r, yy0.numpy()) + + r = x.copy() + for i in range(2): + for j0, j1 in enumerate((0, 2)): + for k0, k1 in enumerate(s[i]): + r[i, j1, k1] = d[i, j0, k0] + np.testing.assert_equal(r, yy1.numpy()) + + r = x.copy() + for i in range(2): + for j0, j1 in enumerate((0, 2)): + for k0, k1 in enumerate(s[i]): + r[i, j1, k1] += d[i, j0, k0] + np.testing.assert_equal(r, yy2.numpy()) + + +# high level + + +def test_advance_indexing_high_level(): + x = np.arange(25).reshape(5, 5).astype("int32") + d = np.arange(15).reshape(3, 5).astype("int32") + xx = Tensor(x) + + np.testing.assert_equal(x[1, :], xx[1, :].numpy()) + np.testing.assert_equal(x[:, 1], xx[:, 1].numpy()) + np.testing.assert_equal(x[1:3, :], xx[1:3, :].numpy()) + + np.testing.assert_equal(x[:, :], xx[:, :].numpy()) + np.testing.assert_equal(x[1, 1], xx[1, 1].numpy()) + yy = xx[(0, 4, 2), :] + np.testing.assert_equal(x[(0, 4, 2), :], yy.numpy()) + + x_ = x.copy() + x_[(0, 4, 2), :] = d + xx_ = Tensor(xx) + xx_[(0, 4, 2), :] = d + np.testing.assert_equal(x_, xx_.numpy()) + + x = np.arange(27).reshape(3, 3, 3).astype("int32") + xx = Tensor(x) + + np.testing.assert_equal(x[1, :, :], xx[1, :, :].numpy()) + np.testing.assert_equal(x[1, :, 1], xx[1, :, 1].numpy()) + np.testing.assert_equal(x[1, 0:1, :], xx[1, 0:1, :].numpy()) + np.testing.assert_equal(x[0:1, 1, 1], xx[0:1, 1, 1].numpy()) + np.testing.assert_equal(x[:, 1, 1], xx[:, 1, 1].numpy()) + np.testing.assert_equal(x[:, 1], xx[:, 1].numpy()) + np.testing.assert_equal(x[1, 1:2], xx[1, 1:2].numpy()) + + x_ = x.copy() + x_[1, 1, 1] = -1 + xx[1, 1, 1] = -1 + np.testing.assert_equal(x_, xx.numpy()) + + x_[:, 1, 1] = -2 + xx[:, 1, 1] = x_[:, 1, 1] + np.testing.assert_equal(x_, xx.numpy()) + + x_[0:1, :, 1] = -3 + xx[0:1, :, 1] = x_[0:1, :, 1] + np.testing.assert_equal(x_, xx.numpy()) + + x_[0:1, :, 1] = -4 + y = Tensor(x_) + xx[0:1, :, 1] = y[0:1, :, 1] + np.testing.assert_equal(y.numpy(), xx.numpy()) + + x[:] = 1 + xx[:] = 1 + np.testing.assert_equal(x, xx.numpy()) + + x = np.arange(9).reshape(3, 3).astype("int32") + xx = Tensor(x) + y = np.array([1, 2]) + yy = Tensor(y) + np.testing.assert_equal(x[:, y[0]], xx[:, y[0]].numpy()) + # np.testing.assert_equal(x[:, y[0]], xx[:, yy[0]].numpy()) # FIXME + np.testing.assert_equal(x[:, y], xx[:, y].numpy()) + np.testing.assert_equal(x[:, y], xx[:, yy].numpy()) + + x_ = x.copy() + x_[:, y[0]] = -1 + xx_ = Tensor(x_) + xx[:, yy[0]] = xx_[:, yy[0]] + np.testing.assert_equal(x_, xx.numpy()) + + x_[:, y] = -1 + xx_ = Tensor(x_) + xx[:, yy] = xx_[:, yy] + np.testing.assert_equal(x_, xx.numpy()) + + x = np.arange(9).reshape(3, 3).astype("int32") + xx = Tensor(x) + y = np.array([1]) + yy = Tensor(y) + np.testing.assert_equal(x[:, y[0]], xx[:, y[0]].numpy()) + # np.testing.assert_equal(x[:, y[0]], xx[:, yy[0]].numpy()) # FIXME + np.testing.assert_equal(x[:, y], xx[:, y].numpy()) + + # XXX: no way to tell whether yy is scalar or ndim=1 array + np.testing.assert_equal(x[:, y], xx[:, yy].numpy()) + + x = np.arange(9).reshape(3, 3).astype("int32") + xx = Tensor(x) + np.testing.assert_equal(x[[0, 1], 0], xx[[0, 1], 0].numpy()) + np.testing.assert_equal(x[0:2, 0], xx[0:2, 0].numpy()) + + +def test_advance_indexing_with_bool(): + a = np.arange(9).reshape(3, 3).astype(np.float32) + b = np.array([1, 2, 3]) + c = np.array([1, 2, 3]) + aa = Tensor(a) + bb = Tensor(b) + cc = Tensor(c) + np.testing.assert_equal(a[b == 1, c == 2], aa[bb == 1, cc == 2].numpy()) + a[b == 1, c == 2] = -1.0 + aa[bb == 1, cc == 2] = -1.0 + np.testing.assert_equal(a, aa.numpy()) + + a = np.arange(9).reshape(3, 3).astype(np.float32) + b = np.array([False, True, True]) + c = np.array([2, 0]).astype(np.int32) + aa = Tensor(a) + bb = Tensor(b) + cc = Tensor(c) + np.testing.assert_equal(a[b, c], aa[bb, cc].numpy()) + a[b, c] = -1.0 + aa[bb, cc] = -1.0 + np.testing.assert_equal(a, aa.numpy()) + d = np.array([-1, -2], dtype=np.float32) + dd = Tensor(d) + a[b, c] = d + aa[bb, cc] = dd + np.testing.assert_equal(a, aa.numpy()) + + a = np.ones((2, 2)) + b = np.array([[True, False], [False, True]]) + aa = Tensor(a) + bb = Tensor(b) + np.testing.assert_equal(a[b], aa[bb].numpy()) + b[:] = True + bb[:] = True + np.testing.assert_equal(a[b], aa[bb].numpy()) + np.testing.assert_equal(a[:, [True, False]], aa[:, [True, False]].numpy()) + + a = np.ones((2, 2), dtype=np.int32) + b = np.array([[False, False], [False, False]]) + aa = Tensor(a) + bb = Tensor(b) + np.testing.assert_equal(a[b], aa[bb].numpy()) + + b = np.array([False, False]) + bb = Tensor(b) + np.testing.assert_equal(a[b], aa[bb].numpy().reshape(a[b].shape)) # FIXME + + a = np.arange(576).reshape(2, 3, 4, 3, 4, 2).astype("int32") + aa = Tensor(a) + + b = (np.random.sample((2, 3, 4)) > 0.5).astype("bool") + bb = Tensor(b) + np.testing.assert_equal(a[b, :, 0:4:2], aa[bb, :, 0:4:2].numpy()) + + b = (np.random.sample((4, 3, 4)) > 0.5).astype("bool") + bb = Tensor(b) + np.testing.assert_equal(a[..., b, 0:2], aa[..., bb, 0:2].numpy()) + + b = (np.random.sample((3, 4, 3)) > 0.5).astype("bool") + bb = Tensor(b) + np.testing.assert_equal( + a[:, b, 0:2, [True, False]], aa[:, bb, 0:2, [True, False]].numpy() + ) diff --git a/imperative/python/test/unit/test_jit.py b/imperative/python/test/unit/test_jit.py new file mode 100644 index 0000000000000000000000000000000000000000..4bc9c2f17dc612a02480c802d289693cc71906e9 --- /dev/null +++ b/imperative/python/test/unit/test_jit.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import pytest + +from megengine.core import Tensor + +# from megengine.core.interpreter.hints import function + + +@pytest.mark.skip(reason="under rewrite") +def test_1(): + @function + def f(x, p): + x = x + 1 + if p: + return x * x + return x * 2 + + x = Tensor(0) + + for _ in range(5): + assert f(x, 0).numpy() == 2 + assert f(x, 1).numpy() == 1 diff --git a/imperative/python/test/unit/test_loss.py b/imperative/python/test/unit/test_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..c4abbd682fcc47e31f6e8a8cdb118d8b1d4ccbeb --- /dev/null +++ b/imperative/python/test/unit/test_loss.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +import megengine.functional as F +from megengine import tensor + + +# XXX need to test label_smooth +def test_cross_entropy_with_softmax(): + data = tensor([1, 100]).astype(np.float32).reshape((1, 2)) + label = tensor([1]).astype(np.int32) + loss = F.cross_entropy_with_softmax(data, label) + np.testing.assert_allclose(loss.numpy(), 0.0) diff --git a/imperative/python/test/unit/test_megbrain_graph.py b/imperative/python/test/unit/test_megbrain_graph.py new file mode 100644 index 0000000000000000000000000000000000000000..3fb6a9de921947bfeceaec9dbc7c8ff993c04d4d --- /dev/null +++ b/imperative/python/test/unit/test_megbrain_graph.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from concurrent.futures import Future + +import numpy as np + +import megengine.functional as F +from megengine.core._imperative_rt import DeviceTensorND +from megengine.core.tensor import megbrain_graph as mgb_graph +from megengine.core.tensor.raw_tensor import as_raw_tensor + + +def make_dev_tensor(value, dtype=None, device=None): + return as_raw_tensor(value, dtype=dtype, device=device)._dev_tensor() + + +def test_io(): + g = mgb_graph.Graph() + x = make_dev_tensor(np.random.randn(3).astype("float32"), device="xpux") + vx, _ = mgb_graph.input_callback( + lambda: x, device=x.comp_node, dtype=x.dtype, graph=g + ) + y = Future() + v = mgb_graph.output_callback(y.set_result, vx) + f = g.compile(v) + f() + + np.testing.assert_equal(x.numpy(), y.result().numpy()) + + +def test_io2(): + g = mgb_graph.Graph() + g.options.async_exec_level = 0b100 + dtype, device = "float32", "xpux" + px = mgb_graph.InputNode(device=device, dtype=dtype, graph=g) + py = mgb_graph.OutputNode(px.outputs[0]) + f = g.compile(py.outputs[0]) + + for _ in range(3): + f.execute() + x = make_dev_tensor(np.random.randn(10).astype(dtype), device=device) + px.set_value(x) + y = py.get_value() + np.testing.assert_equal(x.numpy(), y.numpy()) + f.wait() + + +def test_attr_output(): + g = mgb_graph.Graph() + g.options.async_exec_level = 0b100 + dtype, device = "float32", "xpux" + px = mgb_graph.InputNode(device=device, dtype=dtype, graph=g) + py = mgb_graph.AttrOutputNode(px.outputs[0]) + f = g.compile(py.outputs[0]) + + for shape in [(2,), (3,), (5,)]: + f.execute() + x = make_dev_tensor(np.random.randn(*shape).astype(dtype), device=device) + px.set_value(x) + ay = py.get_value() + assert ay.shape == shape + assert ay.dtype == np.dtype(dtype) + assert ay.device == device + f.wait() + + +def test_op(): + g = mgb_graph.Graph() + x = make_dev_tensor(np.random.randn(10).astype("float32"), device="xpux") + v, _ = mgb_graph.input_callback( + lambda: x, device=x.comp_node, dtype=x.dtype, graph=g + ) + v = F.neg(v) + y = Future() + v = mgb_graph.output_callback(y.set_result, v) + f = g.compile(v) + f() + + np.testing.assert_equal(x.numpy(), -y.result().numpy()) diff --git a/imperative/python/test/unit/test_module.py b/imperative/python/test/unit/test_module.py new file mode 100644 index 0000000000000000000000000000000000000000..5de497ed873eee5830f5dfec87efafae4c4148e0 --- /dev/null +++ b/imperative/python/test/unit/test_module.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import platform + +import pytest + + +@pytest.mark.skipif( + platform.system() == "Darwin", reason="do not imp GPU mode at macos now" +) +@pytest.mark.skipif( + platform.system() == "Windows", reason="do not imp GPU mode at Windows now" +) +@pytest.mark.isolated_distributed +def test_syncbn(): + import numpy as np + import multiprocessing as mp + from megengine.distributed.group import Server + + nr_chan = 8 + nr_ranks = 4 + data_shape = (3, nr_chan, 4, nr_ranks * 8) + momentum = 0.9 + eps = 1e-5 + running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32) + running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32) + steps = 4 + server = Server(0) + port = server.py_server_port + + def worker(rank, data, yv_expect, running_mean, running_var): + import megengine as mge + import megengine.distributed as dist + from megengine import tensor + from megengine.module import SyncBatchNorm + from megengine.distributed.group import Group + from megengine.test import assertTensorClose + + if mge.get_device_count("gpu") < nr_ranks: + return + dist.init_process_group("localhost", port, nr_ranks, rank, rank) + group = Group([i for i in range(nr_ranks)]) + bn = SyncBatchNorm(nr_chan, eps=eps, momentum=momentum, group=group) + data_tensor = None + for i in range(steps): + if data_tensor is None: + data_tensor = tensor(data[i], device=f"gpu{rank}:0") + else: + data_tensor.set_value(data[i]) + yv = bn(data_tensor) + + assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) + assertTensorClose(running_mean, bn.running_mean.numpy(), max_err=5e-6) + assertTensorClose(running_var, bn.running_var.numpy(), max_err=5e-6) + + xv = [] + for i in range(steps): + xv.append(np.random.normal(loc=2.3, size=data_shape).astype(np.float32)) + xv_transposed = np.transpose(xv[i], [0, 2, 3, 1]).reshape( + (data_shape[0] * data_shape[2] * data_shape[3], nr_chan) + ) + + mean = np.mean(xv_transposed, axis=0).reshape(1, nr_chan, 1, 1) + + var_biased = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1)) + sd = np.sqrt(var_biased + eps) + + var_unbiased = np.var(xv_transposed, axis=0, ddof=1).reshape((1, nr_chan, 1, 1)) + running_mean = running_mean * momentum + mean * (1 - momentum) + running_var = running_var * momentum + var_unbiased * (1 - momentum) + + yv_expect = (xv[i] - mean) / sd + + data = [] + for i in range(nr_ranks): + data.append([]) + for j in range(steps): + data[i].append(xv[j][:, :, :, i * 8 : i * 8 + 8]) + + procs = [] + for rank in range(nr_ranks): + p = mp.Process( + target=worker, + args=( + rank, + data[rank], + yv_expect[:, :, :, rank * 8 : rank * 8 + 8], + running_mean, + running_var, + ), + ) + p.start() + procs.append(p) + for p in procs: + p.join(10) + assert p.exitcode == 0 + + +def test_module_conv2d(): + from megengine.module.conv import Conv2d + + conv = Conv2d(2, 3, 1) diff --git a/imperative/python/test/unit/test_raw_tensor.py b/imperative/python/test/unit/test_raw_tensor.py new file mode 100644 index 0000000000000000000000000000000000000000..0f4ae7ec53a472fa86798142683c07b6fdc17a40 --- /dev/null +++ b/imperative/python/test/unit/test_raw_tensor.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +import megengine.functional as F +from megengine.core.tensor.raw_tensor import as_raw_tensor + + +def test_as_raw_tensor(): + x = np.arange(6, dtype="float32").reshape(2, 3) + xx = as_raw_tensor(x, device="xpux") + yy = F.add(xx, 1).numpy() + assert xx.dtype == np.float32 + assert xx.device == "xpux" + np.testing.assert_almost_equal(yy, x + 1) + + +def test_as_raw_tensor_from_int64(): + x = np.arange(6, dtype="int64").reshape(2, 3) + xx = as_raw_tensor(x, dtype="float32", device="xpux") + yy = F.add(xx, 1).numpy() + assert xx.dtype == np.float32 + assert xx.device == "xpux" + np.testing.assert_almost_equal(yy, x.astype("float32") + 1) diff --git a/imperative/python/test/unit/test_serialization.py b/imperative/python/test/unit/test_serialization.py new file mode 100644 index 0000000000000000000000000000000000000000..5fa19bd4b5a3def2c89736e2c0fa5b717d32c1b9 --- /dev/null +++ b/imperative/python/test/unit/test_serialization.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import pickle +from tempfile import TemporaryFile + +import numpy as np + +import megengine as mge +from megengine import Buffer, Parameter, tensor + + +def test_tensor_serialization(): + def tensor_eq(a, b): + assert a.dtype == b.dtype + assert a.device == b.device + assert a.requires_grad == b.requires_grad + np.testing.assert_equal(a.numpy(), b.numpy()) + + with TemporaryFile() as f: + data = np.random.randint(low=0, high=7, size=[233]) + a = tensor(data, device="xpux", dtype=np.int32) + pickle.dump(a, f) + f.seek(0) + b = pickle.load(f) + np.testing.assert_equal(a.numpy(), b.numpy()) + + with TemporaryFile() as f: + a = Parameter(np.random.random(size=(233, 2)).astype(np.float32)) + pickle.dump(a, f) + f.seek(0) + b = pickle.load(f) + assert isinstance(b, Parameter) + np.testing.assert_equal(a.numpy(), b.numpy()) + + with TemporaryFile() as f: + a = Buffer(np.random.random(size=(2, 233)).astype(np.float32)) + pickle.dump(a, f) + f.seek(0) + b = pickle.load(f) + assert isinstance(b, Buffer) + np.testing.assert_equal(a.numpy(), b.numpy()) + + with TemporaryFile() as f: + a = Buffer(np.random.random(size=(2, 233)).astype(np.float32)) + mge.save(a, f) + f.seek(0) + b = mge.load(f, map_location="cpux") + assert isinstance(b, Buffer) + assert "cpu" in str(b.device) + np.testing.assert_equal(a.numpy(), b.numpy()) + + with TemporaryFile() as f: + if mge.is_cuda_available(): + device_org = mge.get_default_device() + a = Buffer(np.random.random(size=(2, 233)).astype(np.float32)) + mge.save(a, f) + f.seek(0) + mge.set_default_device("cpux") + b = mge.load(f, map_location={"gpu0": "cpu0"}) + assert isinstance(b, Buffer) + assert "cpu0" in str(b.device) + np.testing.assert_equal(a.numpy(), b.numpy()) + mge.set_default_device(device_org) diff --git a/imperative/python/test/unit/test_tensor_wrapper.py b/imperative/python/test/unit/test_tensor_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..92dc1c255fd967b9391b86dc5ab0689b244370ad --- /dev/null +++ b/imperative/python/test/unit/test_tensor_wrapper.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +from megengine.core.tensor.tensor_wrapper import TensorWrapper + + +def test_basic(): + x_np = np.random.rand(10).astype("float32") + x = TensorWrapper(x_np) + y = x * x + y_np = y.numpy() + np.testing.assert_almost_equal(y_np, x_np * x_np) + + +def test_literal_arith(): + x_np = np.random.rand(10).astype("float32") + x = TensorWrapper(x_np) + y = x * 2 + y_np = y.numpy() + np.testing.assert_almost_equal(y_np, x_np * 2) + + +def test_matmul(): + A = TensorWrapper(np.random.rand(5, 7).astype("float32")) + B = TensorWrapper(np.random.rand(7, 10).astype("float32")) + C = A @ B + np.testing.assert_almost_equal(C.numpy(), A.numpy() @ B.numpy(), decimal=6) + + +def test_reduce(): + for m in ["sum", "prod", "min", "max", "mean"]: + x_np = np.random.rand(10).astype("float32") + x = TensorWrapper(x_np) + y = getattr(x, m)(-1) + np.testing.assert_almost_equal(y.numpy(), getattr(x_np, m)(-1), decimal=6) + + +def test_set_subtensor(): + x = TensorWrapper([1, 2, 3]) + x[:] = [1, 1, 1] + np.testing.assert_almost_equal(x.numpy(), [1, 1, 1], decimal=6) + x[[0, 2]] = [3, 2] + np.testing.assert_almost_equal(x.numpy(), [3, 1, 2], decimal=6) + x[1:3] = [4, 5] + np.testing.assert_almost_equal(x.numpy(), [3, 4, 5], decimal=6) diff --git a/imperative/python/test/unit/test_util.py b/imperative/python/test/unit/test_util.py new file mode 100644 index 0000000000000000000000000000000000000000..414dbd21137d9d6c66096fa61bd83617d0b876ad --- /dev/null +++ b/imperative/python/test/unit/test_util.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from megengine.core._imperative_rt import Logger + + +def test_logger(): + orig_level = Logger().set_log_level(Logger.LogLevel.Info) + assert Logger().set_log_level(Logger.LogLevel.Info) == Logger.LogLevel.Info + Logger().set_log_level(orig_level) diff --git a/imperative/python/tools/gen_op_defs.py b/imperative/python/tools/gen_op_defs.py new file mode 100755 index 0000000000000000000000000000000000000000..e892a0f5d34c66a2e7853d3a0190d479d2389307 --- /dev/null +++ b/imperative/python/tools/gen_op_defs.py @@ -0,0 +1,504 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import argparse +import collections +import textwrap +import os +import hashlib +import struct + +class member_defs: + """contain classes to define members of an opr param""" + + Dtype = collections.namedtuple('Dtype', ['cname', 'pycvt', 'pyfmt', + 'cppjson', 'cname_attr']) + Dtype.__new__.__defaults__ = ('', ) + uint32 = Dtype('uint32_t', 'int', 'I', 'NumberInt') + uint64 = Dtype('uint64_t', 'int', 'Q', 'NumberInt', + 'alignas(sizeof(uint64_t)) ') + int32 = Dtype('int32_t', 'int', 'i', 'NumberInt') + float32 = Dtype('float', 'float', 'f', 'Number') + float64 = Dtype('double', 'float', 'd', 'Number') + dtype = Dtype('DTypeEnum', '_as_dtype_num', 'I', 'Number') + bool = Dtype('bool', 'bool', '?', 'Bool') + + class Base: + pass + + + class Doc: + """wrap an identifier to associate document + + note: if the doc starts with a linebreak, it would not be reforamtted. + """ + __slots__ = ['id', 'doc'] + + def __init__(self, id_, doc): + assert isinstance(id_, str) and isinstance(doc, str), (id_, doc) + self.id = id_ + self.doc = doc + + @property + def no_reformat(self): + """whether reformat is disallowed for this doc string""" + return self.doc.startswith('\n') + + @property + def raw_lines(self): + """the doc lines when ``no_format`` is true""" + ret = self.doc.split('\n') + assert not ret[0] + return ret[1:] + + @classmethod + def make(cls, v): + """make doc object from str or doc""" + if isinstance(v, cls): + return v + assert isinstance(v, str) + return cls(v, '') + + def __str__(self): + return self.id + + def __eq__(self, rhs): + if isinstance(rhs, str): + return self.id == rhs + return (isinstance(rhs, Doc) and + (self.id, self.doc) == (rhs.id, rhs.doc)) + + + class Enum(Base): + """define an enum; the result would contain both an enum class def and its + corresponding data field + + :param default: index of default member value + + :attr name_field: name of the data field of this enum in the param + struct + :attr member_alias: list of (member, alias) pairs + """ + __slots__ = ['name', 'name_field', 'members', 'default', + 'member_alias'] + + all_enums = {} + """(param_name, name) => enum""" + + def __init__(self, param_name, name, name_field, members, default, + member_alias): + name = member_defs.Doc.make(name) + assert name.id[0].isupper() + members = tuple(map(member_defs.Doc.make, members)) + if isinstance(default, str): + if default not in name_field: + raise ValueError( + "Default value '{}' does not exist.".format(default)) + default = name_field.index(default) + assert isinstance(default, int) + self.name = name + self.name_field = self.get_name_field(name.id, name_field) + self.members = members + self.default = default + + self.all_enums[(param_name, name.id)] = self + + assert isinstance(member_alias, list) + self.member_alias = member_alias + + @classmethod + def get_name_field(cls, name, name_field): + if name_field is None: + name_field = name[0].lower() + name[1:] + assert isinstance(name_field, str) + return name_field + + class Field(Base): + """define a normal data field""" + __slots__ = ['name', 'dtype', 'default'] + + def __init__(self, name, dtype, default): + assert isinstance(dtype, member_defs.Dtype) + self.name = member_defs.Doc.make(name) + self.dtype = dtype + self.default = default + + class Const(Base): + """define a const data field""" + __slots__ = ['name', 'dtype', 'default'] + + def __init__(self, name, dtype, default): + assert isinstance(dtype, member_defs.Dtype) + self.name = member_defs.Doc.make(name) + self.dtype = dtype + self.default = default + + class EnumAlias(Base): + """alias of enum type from another param""" + __slots__ = ['name', 'name_field', 'src_class', 'src_name', 'default'] + + def __init__(self, name, name_field, src_class, src_name, default): + self.name = name + self.name_field = member_defs.Enum.get_name_field(name, name_field) + self.src_class = src_class + if src_name is None: + src_name = name + self.src_name = src_name + self.default = default + + @property + def src_enum(self): + """source Enum class""" + return member_defs.Enum.all_enums[(self.src_class, self.src_name)] + + def get_default(self): + """get default index; fallback to src index if default is not + set""" + if self.default is None: + return self.src_enum.default + return self.default + + +class ParamDef: + """""" + __all_tags = set() + all_param_defs = [] + + __slots__ = ['name', 'members', 'tag', 'is_legacy'] + + def __init__(self, name, doc='', *, version=0, is_legacy=False): + self.members = [] + self.all_param_defs.append(self) + h = hashlib.sha256(name.encode('utf-8')) + if version: + h.update(struct.pack(' 0: + self._indent() + + +class PyWriter(IndentWriterBase): + + _static_members = None + _non_static_members = None + _enums = None + _enum_map = None + + def __call__(self, fout, defs): + super().__call__(fout) + self._enum_map = {} + self._write('// %s', self._get_header()) + self._write('#include "megbrain/imperative/opdef/all.h"') + self._write('') + self._write('using namespace mgb::imperative;') + self._write('') + self._process(defs) + + def _on_param_begin(self, p): + self._enums = [] + self._non_static_members = [] + self._static_members = [] + + def _reg_enum_single(self, cur_def, e): + alias = None + if isinstance(e, member_defs.Enum): + src = e + else: + assert isinstance(e, member_defs.EnumAlias) + src = e.src_enum + alias = e + + src_py_name = self._enum_map.get(src, None) + if src_py_name is not None: + py_name = '{}{}Enum'.format(cur_def, src.name if alias is None else alias.name) + self._write('m.attr("{}") = m.attr("{}");\n'.format(py_name, src_py_name)) + return + + if alias is None: + enum_name = str(src.name) + else: + enum_name = str(alias.name) + c_name = 'opdef::{}::{}'.format(cur_def, enum_name) + py_name = '{}{}Enum'.format(cur_def, enum_name) + self._write('py::enum_<{}>(m, "{}")'.format(c_name, py_name), indent=1) + for i in src.members: + self._write('.value("{0}", {1}::{0})'.format(i, c_name)) + self._write(';\n', indent=-1) + self._enum_map[src] = py_name + + def _on_param_end(self, p): + cur_def = '{}Def'.format(p.name) + for e in self._enums: + self._reg_enum_single(cur_def, e) + self._write('py::class_(m, "{0}")'.format(cur_def), indent=1) + # TODO: use ctor with given default value + self._write('.def(py::init<>())') + for i in self._static_members: + assert isinstance(i, member_defs.Const) + self._write('.def_property_readonly_static("{0}", []() {{ return opdef::{1}::{0}; }})'.format(i.name, cur_def)) + for i in self._non_static_members: + fname = None + if isinstance(i, member_defs.Field): + fname = i.name + else: + assert isinstance(i, (member_defs.Enum, member_defs.EnumAlias)) + fname = i.name_field + self._write('.def_readwrite("{0}", &opdef::{1}::{0})'.format(fname, cur_def)) + self._write(';\n', indent=-1) + + + def _on_member_enum(self, e,): + self._enums.append(e) + self._non_static_members.append(e) + + def _on_member_enum_alias(self, e): + self._enums.append(e) + self._non_static_members.append(e) + + def _on_member_field(self, f): + self._non_static_members.append(f) + + def _on_const_field(self, f): + self._static_members.append(f) + + +class CPPWriter(IndentWriterBase): + _param_namespace = 'opdef' + + _ctor_args = None + """list of (text in func param, var name); func param name must be var name + appended by an underscore""" + _non_static_members = None + + def __call__(self, fout, defs): + super().__call__(fout) + self._write('// %s', self._get_header()) + self._write('#pragma once') + self._write('#include "megdnn.h"') + # which defined in megbrain/tools/param_defs/mgb_opr_param_defs.py + self._write('#include "megbrain/opr/param_defs.h"') + self._write('#include ') + self._write('namespace mgb {') + self._write('namespace imperative {') + self._write('namespace %s {', self._param_namespace) + self._write('namespace {') + self._write('#include "megdnn/dtype.h"') + self._write('using DTypeEnum = megdnn::DTypeEnum;') + self._write('} // anonymous namespace') + self._process(defs) + self._write('} // namespace %s', self._param_namespace) + self._write('} // namespace imperative') + self._write('} // namespace mgb') + self._write('// vim: syntax=cpp.doxygen') + + def _on_param_begin(self, p): + self._write('struct %sDef {', p.name, indent=1) + self._ctor_args = [] + self._non_static_members = [] + + def _add_ctor_args(self, typename, default, varname): + self._ctor_args.append(( + '{} {}_={}'.format(typename, varname, default), + varname)) + + def _on_param_end(self, p): + ''' + MegDNN param structures are not packed and we need to initialize the structure + paddings to zero or it would break MegBrain hash system. We do memset(0) in default + ctor and use a trick, wrapping non-static members in a anonymous union which would + copy the object representation in its default copy/move ctor, for copy/move ctor. + > The implicitly-defined copy/move constructor for a non-union class X performs + > a memberwise copy/move of its bases and members. [class.copy.ctor 14] + > The implicitly-defined copy/move constructor for a union X copies the object + > representation (6.9) of X. [class.copy.ctor 15] + ''' + if self._non_static_members: + self._write('union { struct {') + for i in self._non_static_members: + if isinstance(i, member_defs.Field): + self._write('%s%s %s;', i.dtype.cname_attr, i.dtype.cname, i.name) + else: + assert isinstance(i, (member_defs.Enum, member_defs.EnumAlias)) + self._write('%s %s;', i.name, i.name_field) + self._write('}; };') + param_list = [] + if self._ctor_args: + pdefs, varnames = zip(*self._ctor_args) + self._write('%sDef(%s) {', p.name, ', '.join(pdefs), indent=1) + self._write('memset(this, 0, sizeof(*this));') + for var in varnames: + self._write('this->%s = %s_;', var, var) + param_list.append(str(var)) + self._write('}', indent=-1) + self._write('megdnn::param::%s param() {', self._cur_class, indent=1) + self._write('return {%s};', ','.join(param_list)) + self._write('}', indent=-1) + self._write('};\n', indent=-1) + + + def __on_member_enum(self, e, default_value): + self._write('using %s = megdnn::param::%s::%s;', e.name, self._cur_class, e.name) + self._non_static_members.append(e) + self._add_ctor_args(e.name, default_value, e.name_field) + + def _on_member_enum(self, e,): + self.__on_member_enum(e, '{}::{}'.format(e.name, e.members[e.default])) + + def _on_member_enum_alias(self, e): + self.__on_member_enum(e, '{}::{}'.format(e.name, e.src_enum.members[e.get_default()])) + + def _on_member_field(self, f): + self._non_static_members.append(f) + self._add_ctor_args(f.dtype.cname, f.default, f.name) + + def _on_const_field(self, f): + if 'int' in f.dtype.cname: + self._write('static constexpr %s%s %s = %s;', f.dtype.cname_attr, f.dtype.cname, f.name, f.default) + else: + self._write('static const %s%s %s = %s;', f.dtype.cname_attr, f.dtype.cname, f.name, f.default) + +def main(): + parser = argparse.ArgumentParser( + 'generate opr param defs from description file') + parser.add_argument('-t', '--type', choices=['c++', 'py'], default='c++', + help='output type') + parser.add_argument('input') + parser.add_argument('output') + args = parser.parse_args() + + with open(args.input) as fin: + inputs = fin.read() + exec(inputs, {'pdef': ParamDef, 'Doc': member_defs.Doc}) + input_hash = hashlib.sha256() + input_hash.update(inputs.encode(encoding='UTF-8')) + input_hash = input_hash.hexdigest() + + if args.type == 'py': + writer = PyWriter() + else: + writer = CPPWriter() + + with open(args.output, 'w') as fout: + writer.set_input_hash(input_hash)(fout, ParamDef.all_param_defs) + +if __name__ == '__main__': + main() diff --git a/imperative/python/tools/gen_ops.py b/imperative/python/tools/gen_ops.py new file mode 100755 index 0000000000000000000000000000000000000000..059efc6c925823e97fac68240ba5cf72ea771c2a --- /dev/null +++ b/imperative/python/tools/gen_ops.py @@ -0,0 +1,276 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from io import StringIO +import re +import argparse +import subprocess +import os +import textwrap +import inspect + + +def camel2underscore( + name, *, + first_cap_re=re.compile('([A-Z])([A-Z][a-z]+)'), + all_cap_re = re.compile('([a-z])([A-Z]+)')): + if name.isupper(): + return name.lower() + s1 = first_cap_re.sub(r'\1_\2', name) + return all_cap_re.sub(r'\1_\2', s1).lower() + + +def caller_lineno(level=1): + f = inspect.stack()[level+1] + return '%s:%d' % (f.filename, f.lineno) + + +class Doc: + """wrap an identifier and doc""" + _id = None + + def __init__(self, id_, doc, typestr=None, default=None): + self._id = id_ + self.doc = doc + self.typestr = typestr + self.default = default + + def __str__(self): + return self._id + + +class Context: + fout = None + + def __init__(self): + self.fout = StringIO() + self.indent = 0 + self.generated = [] + self.skipped = [] + + def write(self, text, *fmt, indent=0): + text = textwrap.dedent(text) + text = textwrap.indent(text, ' '*4*(self.indent + indent)) + text = text % fmt + if not text.endswith('\n'): + text += '\n' + self.fout.write(text) + + def _gen_signature(self, params, *, have_config=True, + has_out_dtype=False): + sig = ['self', '*'] + + for i, _ in params: + sig.append('{}=None'.format(i)) + + if have_config: + sig.extend(['name=None', 'comp_node=None', 'config=None']) + if has_out_dtype: + sig.append('dtype=None') + + if params: + sig.append('**kwargs') + + if sig[-1] == '*': + sig.pop() + return ', '.join(sig) + + def _write_canonize_inputs(self, inputs, convert_inputs, + convert_inputs_args=None, + has_out_dtype=False): + self._write_gen_config(has_out_dtype) + inputs = list(map(str, inputs)) + if convert_inputs_args is None: + if inputs[0][0] == '*': + arg = inputs[0][1:] + else: + arg = '[{}]'.format(', '.join(inputs)) + else: + arg = convert_inputs_args + self.write('inputs = helper.%s(%s, config=config)', + convert_inputs, arg) + + def _write_gen_config(self, has_out_dtype=False): + self.write('''\ + config = config or Config() + if name: + config.name = name + if comp_node: + config.comp_node = comp_node + ''') + if has_out_dtype: + self.write('''\ + if dtype: + config.dtype = dtype + ''') + self.write('self.config = config') + + def _write_make_params(self, params): + for pname, ptype in params: + self.write('self.%s = helper.make_param(%s, param_defs.%s, kwargs)', + pname, pname, ptype) + self.write('assert not kwargs, "extra kwargs: {}".format(kwargs)') + + def _write_doc(self, inputs, params, desc): + self.write('"""') + if isinstance(desc, Doc): + assert desc._id is None + self.write(desc.doc) + elif desc: + for i in textwrap.wrap(desc, 75): + self.write(i) + + self.write('') + for i in inputs: + name = str(i) + typestr = ':class:`.Tensor`' + if name[0] == '*': + name = name[1:] + typestr = 'list of ' + typestr + if isinstance(i, Doc): + self.write(':param %s: %s', name, i.doc) + if i.typestr is not None: + typestr = i.typestr + if typestr: + if not isinstance(i, Doc): + self.write(':param %s: ', name) + self.write(':type %s: %s', name, typestr) + + for pname, ptype in params: + self.write(':param %s: ', pname) + self.write(':type %s: :class:`~megbrain.opr_param_defs.%s`', + pname, ptype) + + self.write(':param comp_node: see doc for *config*') + self.write(':param name: see doc for *config*') + self.write( + ':param config: give a :class:`.OperatorNodeConfig` object to set ' + 'operator name and comp node. This can also be achieved by passing ' + '*comp_node* and *name* separately.') + + self.write('"""') + + def _write_return(self, name, outputs): + self.write('opdef = helper.PodOpVisitor("%s", config, params)', name) + self.write('outputs = helper.create_op(opdef, inputs)') + if outputs: + self.write('outputs = [outputs[i] for i in %s]', + list(map(int, outputs))) + self.write('return helper.convert_outputs(outputs)') + + def decl_opr(self, name, *, inputs, params, desc=None, pyname=None, + canonize_input_vars=None, + canonize_input_vars_args=None, body=None, + outputs=None, version=0, has_out_dtype=False): + """ + :param inputs: name of variable inputs; a name starting with `*' means + a list of vars + :type inputs: list of str + :param params: (param name, param type) pairs; it can be a single + string representing the param type, and param name defaults to + 'param' + :type params: list of pair of str, or str + :param pyname: python function name + :param body: extra statements to be placed before calling _create_opr + :param outputs: the indices of output vars to be selected from raw opr + result + """ + if body: + self.skipped.append(name) + return + + body = body or [] + if isinstance(params, str): + params = [('param', params)] + assert params + + self.write('# %s', caller_lineno()) + self.write('class %s(PodOpVisitor):', name) + self.indent += 1 + + param_names, _ = zip(*params) + self.write('param_names = (%s,)', ', '.join(map('"{}"'.format, param_names))) + self.write('name = "%s"', '{}V{}'.format(name, version) if version else name) + self.write('\n') + + self.write('def __init__(%s):', + self._gen_signature(params, + has_out_dtype=has_out_dtype)) + self.indent += 1 + + self._write_gen_config(has_out_dtype=has_out_dtype) + self.write('\n') + + self._write_make_params(params) + + self.write('\n') + self.indent -= 2 + + self.generated.append(name) + + def decl_raw_opr(self, name, *, inputs, inputs_cvt=[], body=None, + desc=None, local_defs=[], have_config=True): + self.skipped.append(name) + + def get_str(self): + return self.fout.getvalue() + + def all_list(self): + buf = StringIO() + print( + '[', + *(' "%s",' % i for i in self.generated), + ']', + sep='\n', + file=buf + ) + return buf.getvalue() + + +def main(): + parser = argparse.ArgumentParser( + description='generate operator function def code from decl file') + parser.add_argument('inputs', nargs='+') + parser.add_argument('--output', '-o') + args = parser.parse_args() + + gen = Context() + exec_globals = { + 'decl_opr': gen.decl_opr, + 'decl_raw_opr': gen.decl_raw_opr, + 'Doc': Doc, + 'camel2underscore': camel2underscore, + } + for i in args.inputs: + print('generate ops from {}'.format(i)) + with open(i) as fin: + exec(compile(fin.read(), i, 'exec'), exec_globals) + + try: + git_commit = subprocess.check_output( + ['git', 'rev-parse', 'HEAD'], universal_newlines=True, + cwd=os.path.dirname(os.path.realpath(__file__))).strip() + except: + git_commit = 'NOT_A_GIT_REPO' + + def relpath(*args): + d = os.path.dirname(__file__) + return os.path.join(d, *args) + + with open(relpath('ops.tpl.py')) as fin: + with open(args.output, 'w') as fout: + fout.write(fin.read() + .replace('{%all%}', gen.all_list()) + .replace('{%body%}', gen.get_str()) + .replace('{%git_commit%}', git_commit)) + + print('Skipped:') + print(*gen.skipped, sep='\n') + +if __name__ == '__main__': + main() diff --git a/imperative/python/tools/ops.tpl.py b/imperative/python/tools/ops.tpl.py new file mode 100644 index 0000000000000000000000000000000000000000..f91004b1f732886623952ea4629be2942af1ff8d --- /dev/null +++ b/imperative/python/tools/ops.tpl.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +"""This python module contains functions to apply the operators defined by +megbrain. + +.. note:: + Most of the functions are automatically generated, and their signature have + the form contain a ``param`` argument (or more than one arguments such as + :func:`convolution` that has ``param`` and ``execution_polity``) and also + accept keyword arguments. In such case, it can be called by either + providing a param object of appropriate type, or by passing the arguments + needed by the constructor of param object to the keyword arguments. + Furthermore, for a param that needs an enumeration member, the enum name + can be used to refer to the enum object. + + For example, the following statements are equivalent:: + + elemwise([a, b], mode='max') + elemwise([a, b], mode=opr_param_defs.Elemwise.Mode.MAX) + elemwise([a, b], param=opr_param_defs.Elemwise('max')) +""" + +__git_commit__ = "{%git_commit%}" + +import collections + +from . import helper +from .helper import PodOpVisitor +from . import param_defs +from ..._imperative_rt import OperatorNodeConfig as Config + +__all__ = {%all%} + +{%body%} diff --git a/imperative/src/impl/blob_manager_impl.cpp b/imperative/src/impl/blob_manager_impl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3fa3c5316c54f1bfd734cc07460ce445ba265866 --- /dev/null +++ b/imperative/src/impl/blob_manager_impl.cpp @@ -0,0 +1,162 @@ +/** + * \file src/core/impl/imperative/physical_tensor.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "./blob_manager_impl.h" +#include "megbrain/utils/arith_helper.h" +#include + +namespace mgb { +namespace imperative { + +BlobManagerImpl::BlobData::BlobData(Blob* in_blob){ + blob = in_blob; + DeviceTensorStorage d_storage; + d_storage.reset(blob->m_comp_node, blob->m_size, blob->m_storage); + + h_storage = HostTensorStorage(blob->m_comp_node); + + h_storage.ensure_size(blob->m_size); + + h_storage.copy_from(const_cast(d_storage), blob->m_size); +} + +void BlobManagerImpl::register_blob(Blob* blob) { + // add blob into the comp2blobs map + MGB_LOCK_GUARD(m_mtx); + mgb_assert(m_comp2blobs_map[blob->m_comp_node].insert(blob)); +} + +void BlobManagerImpl::unregister_blob(Blob* blob) { + // erase blob into the comp2blobs map + MGB_LOCK_GUARD(m_mtx); + mgb_assert(1 == m_comp2blobs_map[blob->m_comp_node].erase(blob)); +} + +void BlobManagerImpl::alloc_with_defrag(Blob* blob, size_t size) { + if (!m_enable) { + alloc_direct(blob, size); + } else { + // // debug + // defrag(blob->m_comp_node); + // alloc_direct(blob, storage, size); + + // try alloc + MGB_TRY { alloc_direct(blob, size); } + // if fail, try defrag, alloc again + MGB_CATCH(MemAllocError&, { + mgb_log_warn("memory allocation failed for blob; try defragmenting"); + defrag(blob->m_comp_node); + alloc_direct(blob, size); + }); + } +} + + +void BlobManagerImpl::alloc_direct(Blob* blob, size_t size) { + DeviceTensorStorage storage(blob->m_comp_node); + mgb_assert(blob->m_comp_node.valid()); + storage.ensure_size(size); + blob->m_storage = storage.raw_storage(); +} + +void BlobManagerImpl::defrag(const CompNode& cn) { + BlobSetWithMux* blobs_set_ptr; + { + MGB_LOCK_GUARD(m_mtx); + blobs_set_ptr = &m_comp2blobs_map[cn]; + } + MGB_LOCK_GUARD(blobs_set_ptr->mtx); + std::vector blob_data_arrary; + std::set storage_set; + + auto alignment = cn.get_mem_addr_alignment(); + size_t tot_sz = 0; + + // copy to HostTensorStorage, and release + for (auto i : blobs_set_ptr->blobs_set) { + // skip if blob do not have m_storage + if (!i->m_storage) continue; + + // skip if ues_count() > 1 + if (i->m_storage.use_count() > 1) continue; + + // two blobs can't share same storage + mgb_assert(storage_set.insert(i->m_storage).second); + + tot_sz += get_aligned_power2(i -> m_size, alignment); + BlobData blob_data(i); + blob_data_arrary.push_back(blob_data); + i -> m_storage.reset(); + } + // clear all, make sure m_storage will be release + storage_set.clear(); + + // skip if no blob to defrag + if (!blob_data_arrary.size()) return; + + // wait all other comp nodes to avoid moved var being read; note that + // ExecEnv has been paused, so no new task would not be dispatched + CompNode::sync_all(); + CompNode::try_coalesce_all_free_memory(); + + // try free all + MGB_TRY{cn.free_device(cn.alloc_device(tot_sz));} + MGB_CATCH(MemAllocError&, {}) + + // allocate for each storage + for (auto i : blob_data_arrary) { + DeviceTensorStorage d_storage = DeviceTensorStorage(cn); + d_storage.ensure_size(i.blob -> m_size); + d_storage.copy_from(i.h_storage, i.blob -> m_size); + i.blob -> m_storage = d_storage.raw_storage(); + } + + // wait copy finish before destructing host values + cn.sync(); +} + +void BlobManagerImpl::set_enable(bool flag) { + m_enable = flag; +} + +struct BlobManagerStub : BlobManager { + void alloc_with_defrag(Blob* blob, size_t size) { + mgb_assert(0, "prohibited after global variable destruction"); + }; + void register_blob(Blob* blob) { + mgb_assert(0, "prohibited after global variable destruction"); + }; + void unregister_blob(Blob* blob) {}; + void set_enable(bool flag) { + mgb_assert(0, "prohibited after global variable destruction"); + }; + void defrag(const CompNode& cn) { + mgb_assert(0, "prohibited after global variable destruction"); + }; +}; + +BlobManager* BlobManager::inst() { + static std::aligned_union_t<0, BlobManagerImpl, BlobManagerStub> storage; + + struct Keeper { + Keeper() { + new(&storage) BlobManagerImpl(); + } + ~Keeper() { + reinterpret_cast(&storage)->~BlobManager(); + new(&storage) BlobManagerStub(); + } + }; + static Keeper _; + + return reinterpret_cast(&storage); +} + +} // namespace imperative +} // namespace mgb diff --git a/imperative/src/impl/blob_manager_impl.h b/imperative/src/impl/blob_manager_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..32ee2879be734a3634166e51ac53d8411d45b38d --- /dev/null +++ b/imperative/src/impl/blob_manager_impl.h @@ -0,0 +1,59 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/imperative/blob_manager.h" + +namespace mgb { +namespace imperative { + +class BlobManagerImpl final: public BlobManager { + + struct BlobSetWithMux { + std::mutex mtx; + ThinHashSet blobs_set; + bool insert(Blob* blob) { + MGB_LOCK_GUARD(mtx); + return blobs_set.insert(blob).second; + } + size_t erase(Blob* blob) { + MGB_LOCK_GUARD(mtx); + return blobs_set.erase(blob); + } + }; + + struct BlobData { + Blob* blob; + HostTensorStorage h_storage; + BlobData(Blob* in_blob); + }; + + std::mutex m_mtx; + CompNode::UnorderedMap m_comp2blobs_map; + bool m_enable; + + void defrag(const CompNode& cn) override; + + void alloc_direct(Blob* blob, size_t size); + +public: + static BlobManager* inst(); + + void alloc_with_defrag(Blob* blob, size_t size) override; + + void register_blob(Blob* blob) override; + + void unregister_blob(Blob* blob) override; + + void set_enable(bool flag) override; +}; + +} // namespace imperative +} // namespace mgb diff --git a/imperative/src/impl/dnn_op_helper.h b/imperative/src/impl/dnn_op_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..17017854d98358c59e263febb0b3c9a68a5b5638 --- /dev/null +++ b/imperative/src/impl/dnn_op_helper.h @@ -0,0 +1,54 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "megbrain/comp_node_env.h" +#include "megbrain/comp_node.h" + +using namespace megdnn; + +namespace mgb { +namespace imperative { + +/*! + * \brief A struct for safely calling DNN oprs + * In some cases, op may be released before the complete of the execution + * This destructor will prevent this + */ +template +struct DnnOprCaller { + CompNode cn; + DeviceTensorND dev_tensor; + Workspace workspace; + std::unique_ptr op; + + DnnOprCaller(CompNode cn): cn(cn) { + auto&& handle = MegDNNHandle::get( + CompNodeEnv::from_comp_node(cn)).handle(); + op = handle->create_operator(); + } + + megdnn::Workspace create_workspace(TensorLayout layout) { + dev_tensor = Tensor::make(layout, cn)->dev_tensor(); + workspace = megdnn::Workspace(dev_tensor.raw_ptr(), + dev_tensor.storage().size()); + return workspace; + } + + ~DnnOprCaller() { + using DT = CompNode::DeviceType; + if (cn.device_type() == DT::CPU && cn != CompNode::default_cpu()) { + CompNodeEnv::from_comp_node(cn).cpu_env().dispatch( + [p = op.release()] { delete p; } + ); + } + } +}; + +} // namespace imperative +} // namespace mgb \ No newline at end of file diff --git a/imperative/src/impl/interpreter_impl.cpp b/imperative/src/impl/interpreter_impl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..297976a8179f1fac808f141d22ea7b800bc6bd13 --- /dev/null +++ b/imperative/src/impl/interpreter_impl.cpp @@ -0,0 +1,213 @@ +#include "./interpreter_impl.h" + + +using namespace mgb; +using namespace imperative; +using namespace interpreter; +using namespace interpreter::intl; + + +std::unique_ptr InterpreterImpl::create_channel() { + return std::make_unique(); +} + +Interpreter& Interpreter::inst() { + static InterpreterImpl inst_; + return inst_; +} + +void* ChannelImpl::put(const HostTensorND& value) { + auto info = alloc(); + info->desc.layout = value.layout(); + info->desc.comp_node = value.comp_node(); + info->desc.value = value.proxy_to_default_cpu(); + m_valid_handle.insert(info); + m_worker.add_task(Put{info, value}); + return info; +} + +void ChannelImpl::del(void* handle) { + mgb_assert(m_valid_handle.erase(handle), "invalid handle: %p", handle); + m_worker.add_task(Del{reinterpret_cast(handle)}); +} + +SmallVector ChannelImpl::apply_op( + std::shared_ptr op, + const SmallVector& inputs) { + SmallVector input_descs; + input_descs.reserve(inputs.size()); + for (auto h : inputs) { + auto info = reinterpret_cast(h); + input_descs.push_back(info->desc); + } + auto output_descs = OpDef::infer_output_attrs_fallible(*op, input_descs); + ApplyOp cmd{std::move(op)}; + cmd.inputs.reserve(inputs.size()); + for (auto i : inputs) { + cmd.inputs.push_back(reinterpret_cast(i)); + } + cmd.outputs.reserve(output_descs.size()); + SmallVector outputs; + for (auto&& desc : output_descs) { + auto info = alloc(); + info->desc = desc; + m_valid_handle.insert(info); + cmd.outputs.push_back(info); + outputs.push_back(info); + } + m_worker.add_task(std::move(cmd)); + return outputs; +} + +HostTensorND ChannelImpl::get_value(void* handle) { + mgb_assert(m_valid_handle.find(handle) != m_valid_handle.end(), + "invalid handle: %p", handle); + auto info = reinterpret_cast(handle); + std::unique_lock lock(m_mutex); + mgb_assert(!m_waitee); + if (!info->value_fetched) { + m_waitee = info; + m_worker.add_task(GetValue{info}); + m_cv.wait(lock, [&]() { + check_worker_exc_unsafe(); + return info->value_fetched; + }); + m_waitee = nullptr; + } + mgb_assert(info->ptr->value_fetched()); + return info->ptr->get_value(); +} + +TensorShape ChannelImpl::get_shape(void* handle) { + mgb_assert(m_valid_handle.find(handle) != m_valid_handle.end(), + "invalid handle: %p", handle); + auto info = reinterpret_cast(handle); + if (info->desc.layout.ndim != 0) { + return info->desc.layout; + } + std::unique_lock lock(m_mutex); + mgb_assert(!m_waitee); + m_waitee = info; + m_cv.wait(lock, [&]() { + check_worker_exc_unsafe(); + return bool(info->ptr); + }); + m_waitee = nullptr; + TensorShape ret = info->ptr->layout(); + mgb_assert(ret.ndim != 0); + return ret; +} + +DType ChannelImpl::get_dtype(void* handle) { + mgb_assert(m_valid_handle.find(handle) != m_valid_handle.end(), + "invalid handle: %p", handle); + auto info = reinterpret_cast(handle); + auto ret = info->desc.layout.dtype; + mgb_assert(ret.valid()); + return ret; +} + +CompNode ChannelImpl::get_device(void* handle) { + mgb_assert(m_valid_handle.find(handle) != m_valid_handle.end(), + "invalid handle: %p", handle); + auto info = reinterpret_cast(handle); + auto ret = info->desc.comp_node; + mgb_assert(ret.valid()); + return ret; +} + +DeviceTensorND ChannelImpl::get_dev_tensor(void* handle) { + mgb_assert(m_valid_handle.find(handle) != m_valid_handle.end(), + "invalid handle: %p", handle); + auto info = reinterpret_cast(handle); + std::unique_lock lock(m_mutex); + mgb_assert(!m_waitee); + m_waitee = info; + m_cv.wait(lock, [&]() { + check_worker_exc_unsafe(); + return bool(info->ptr); + }); + m_waitee = nullptr; + return info->ptr->dev_tensor(); +} + +void ChannelImpl::sync() { + m_worker.wait_all_task_finish(); + MGB_LOCK_GUARD(m_mutex); + check_worker_exc_unsafe(); +} + +void ChannelImpl::close() { + sync(); +} + +void ChannelImpl::config_async_level(int level) { + mgb_assert(0); +} + +TensorInfo* ChannelImpl::alloc() { + MGB_LOCK_GUARD(m_mutex); + return m_pool.alloc(); +} + +void ChannelImpl::free(TensorInfo* ptr) { + MGB_LOCK_GUARD(m_mutex); + m_pool.free(ptr); +} + +ChannelImpl::~ChannelImpl() {} + +void ChannelImpl::produce_tensor(TensorInfo* dest, TensorPtr ptr) { + MGB_LOCK_GUARD(m_mutex); + dest->value_fetched = ptr->value_fetched(); + dest->ptr = std::move(ptr); + if (m_waitee == dest) { + m_cv.notify_all(); + } +} + +void ChannelImpl::process_one_task(Command& cmd) { + std::visit([this](auto& cmd) { + using T = std::remove_reference_t; + try { + if constexpr (std::is_same_v) { + produce_tensor(cmd.dest, Tensor::make(cmd.value)); + } else if constexpr (std::is_same_v) { + SmallVector tensor_inputs; + tensor_inputs.reserve(cmd.inputs.size()); + for (auto i : cmd.inputs) { + tensor_inputs.push_back(i->ptr); + } + auto tensor_outputs = OpDef::apply_on_physical_tensor(*cmd.op, tensor_inputs); + mgb_assert(tensor_outputs.size() == cmd.outputs.size()); + for (size_t i = 0; i < tensor_outputs.size(); ++i) { + produce_tensor(cmd.outputs[i], std::move(tensor_outputs[i])); + } + } else if constexpr (std::is_same_v) { + free(cmd.dest); + } else if constexpr (std::is_same_v) { + cmd.dest->ptr->fetch_value(); + MGB_LOCK_GUARD(m_mutex); + cmd.dest->value_fetched = true; + if (m_waitee == cmd.dest) { + m_cv.notify_all(); + } + } else { + static_assert(!std::is_same_v); + } + } catch (...) { + MGB_LOCK_GUARD(m_mutex); + m_worker_exc = std::current_exception(); + m_cv.notify_all(); + } + }, cmd); +} + + +void ChannelImpl::check_worker_exc_unsafe() { + if (m_worker_exc) { + std::exception_ptr exc; + std::swap(exc, m_worker_exc); + std::rethrow_exception(exc); + } +} diff --git a/imperative/src/impl/interpreter_impl.h b/imperative/src/impl/interpreter_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..fae219958d5fddc40f62eff026dce688bcad562b --- /dev/null +++ b/imperative/src/impl/interpreter_impl.h @@ -0,0 +1,95 @@ +#include +#include + +#include "megbrain/utils/mempool.h" +#include "megbrain/imperative/interpreter.h" + + +namespace mgb::imperative::interpreter::intl { + +using Handle = Interpreter::Handle; + +struct InterpreterImpl : Interpreter { + std::unique_ptr create_channel() override; +}; + +struct TensorInfo { + TensorPtr ptr; + LogicalTensorDesc desc; + bool value_fetched = false; +}; + +struct Put { + TensorInfo* dest; + HostTensorND value; +}; +struct ApplyOp { + std::shared_ptr op; + SmallVector inputs; + SmallVector outputs; +}; +struct Del { + TensorInfo* dest; +}; +struct GetValue { + TensorInfo* dest; +}; +using Command = std::variant; + +struct ChannelImpl : Interpreter::Channel { + ChannelImpl() : m_worker(this) {} + ~ChannelImpl() override; + + Handle put(const HostTensorND& value) override; + + void del(Handle) override; + + SmallVector apply_op( + std::shared_ptr op, + const SmallVector& inputs) override; + + HostTensorND get_value(Handle) override; + TensorShape get_shape(Handle) override; + DType get_dtype(Handle) override; + CompNode get_device(Handle) override; + + DeviceTensorND get_dev_tensor(Handle) override; + + void sync() override; + void close() override; + + void config_async_level(int level) override; + +private: + TensorInfo* alloc(); + void free(TensorInfo*); + + void process_one_task(Command&); + + void check_worker_exc_unsafe(); + + void produce_tensor(TensorInfo* dest, TensorPtr ptr); + + std::mutex m_mutex; + std::condition_variable m_cv; + MemPool m_pool; + std::unordered_set m_valid_handle; + TensorInfo* m_waitee = nullptr; + std::exception_ptr m_worker_exc; + + struct WorkQueue : AsyncQueueSC { + WorkQueue(ChannelImpl* owner) : m_owner(owner) {} + void process_one_task(Command& cmd) { + m_owner->process_one_task(cmd); + } + private: + ChannelImpl* m_owner; + } m_worker; + + int m_async_level = 2; +}; + +} // namespace mgb::imperative::interpreter::intl diff --git a/imperative/src/impl/op_def.cpp b/imperative/src/impl/op_def.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cd1d1c39d6bd67ea8d486fc41af143f6a977e168 --- /dev/null +++ b/imperative/src/impl/op_def.cpp @@ -0,0 +1,82 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "megbrain/imperative/op_def.h" +#include "megbrain/imperative/ops/opr_attr.h" + +#include "./op_trait.h" + +namespace mgb { +namespace imperative { + +std::shared_ptr OpDef::make_from_op_node( + cg::OperatorNodeBase* node) { + OpTrait* trait; + trait = OpTrait::find_by_typeinfo(node->dyn_typeinfo()); + if (!trait) { + // TODO: register `make_from_op_node` for each OperatorNode + // instead of forwarding to OprAttr + trait = OpTrait::find_by_typeinfo(OprAttr::typeinfo()); + } + mgb_assert(trait); + return trait->make_from_op_node(node); +} + +SmallVector OpDef::apply_on_physical_tensor( + const OpDef& def, + const SmallVector& inputs) { + return def.trait()->apply_on_physical_tensor(def, inputs); +} + +void OpDef::exec( + const OpDef& def, + const SmallVector& inputs, + const SmallVector& outputs) { + def.trait()->exec(def, inputs, outputs); +} + +cg::OperatorNodeBase* OpDef::apply_on_var_node( + const OpDef& def, + const VarNodeArray& inputs) { + return def.trait()->apply_on_var_node(def, inputs); +} + +SmallVector OpDef::infer_output_attrs_fallible( + const OpDef& def, + const SmallVector& inputs) { + return def.trait()->infer_output_attrs_fallible(def, inputs); +} + +SmallVector OpDef::infer_output_attrs( + const OpDef& def, + const SmallVector& inputs) { + return def.trait()->infer_output_attrs(def, inputs); +} + +BackwardGraphResult OpDef::make_backward_graph( + const OpDef& def, + const SmallVector& inputs, + const SmallVector& input_requires_grad, + const SmallVector& output_has_grad) { + return def.trait()->make_backward_graph(def, inputs, input_requires_grad, output_has_grad); +} + +const OpTrait* OpDef::trait() const { + if (!m_trait) { + m_trait = OpTrait::find_by_typeinfo(dyn_typeinfo()); + mgb_throw_if(!m_trait, MegBrainError, + "can not find op_trait by %s", dyn_typeinfo()->name); + } + return m_trait; +} + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/impl/op_trait.cpp b/imperative/src/impl/op_trait.cpp new file mode 100644 index 0000000000000000000000000000000000000000..06163296a0bdc24cec7a9a1dcad9421b5dd42864 --- /dev/null +++ b/imperative/src/impl/op_trait.cpp @@ -0,0 +1,160 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include + +#include "megbrain/imperative/ops/opr_attr.h" + +#include "./op_trait.h" +#include "./proxy_graph_detail.h" + +namespace mgb { +namespace imperative { + +namespace detail { + +struct StaticData { + std::list registries; + std::unordered_map name2reg; + std::unordered_map type2reg; +}; + +// use "Construct On First Use" to prevent "static initialization order fiasco" +// (i.e., ensure global registry was initialized before calling opr registration) +StaticData& static_data() { + static StaticData data; + return data; +} + +template +struct __not_implementation__; + +template +struct __not_implementation__ { + static RType raise(Args ...) { + mgb_throw(MegBrainError, "Not Implemented"); + } +}; + +} // detail + +OpTrait::OpTrait(const char* name_): name(name_) {} + +OpTrait* OpTrait::find_by_typeinfo(Typeinfo* type) { + auto&& type2reg = detail::static_data().type2reg; + auto iter = type2reg.find(type); + if (iter == type2reg.end()) { + return nullptr; + } + return iter->second; +} + +OpTrait* OpTrait::find_by_name(const char* name) { + auto&& name2reg = detail::static_data().name2reg; + auto iter = name2reg.find(name); + if (iter == name2reg.find(name)) { + return nullptr; + } + return iter->second; +} + +void OpTrait::for_each_trait(thin_function visitor){ + for(auto& trait: detail::static_data().registries){ + visitor(trait); + } +} + +OpTraitRegistry& OpTraitRegistry::finalize() { + std::ostringstream msg; + #define CHECK(field) if (!trait->field) { \ + msg << ", " #field; \ + trait->field = \ + detail::__not_implementation__::raise; \ + } + CHECK(make_from_op_node); + CHECK(apply_on_physical_tensor); + CHECK(exec); + CHECK(apply_on_var_node); + CHECK(infer_output_attrs_fallible); + CHECK(infer_output_attrs); + CHECK(make_backward_graph); + #undef CHECK + if (msg.tellp() > 0) { + mgb_log_warn( + "%s op trait missing: %s", + trait->name ? trait->name : "(anonymous)", + msg.str().c_str() + 2 /* skip first ", " */); + } + return *this; +} + +SmallVector fallback_apply_on_physical_tensor( + const OpDef& def, + const SmallVector& inputs) { + auto desc = OpDef::infer_output_attrs(def, inputs); + SmallVector outputs; + for (auto&& i : desc) { + outputs.push_back(Tensor::make(i.layout, i.comp_node)); + } + OpDef::exec(def, inputs, outputs); + return outputs; +} + +SmallVector fallback_infer_output_attrs(const OpDef& def, + const SmallVector& inputs){ + SmallVector input_descs; + for(auto&& input: inputs){ + input_descs.push_back({input->layout(), input->comp_node()}); + } + return input_descs; +} + +OpTraitRegistry& OpTraitRegistry::fallback() { + if (!trait->exec && trait->apply_on_var_node) { + trait->exec = proxy_graph_detail::exec; + } + if (!trait->infer_output_attrs && trait->apply_on_var_node) { + trait->infer_output_attrs = proxy_graph_detail::infer_output_attrs; + } + if (!trait->infer_output_attrs_fallible && trait->apply_on_var_node) { + trait->infer_output_attrs_fallible = proxy_graph_detail::infer_output_attrs_fallible; + } + if (!trait->make_backward_graph && trait->apply_on_var_node) { + trait->make_backward_graph = proxy_graph_detail::make_backward_graph; + } + if (!trait->apply_on_physical_tensor && trait->infer_output_attrs && trait->exec) { + trait->apply_on_physical_tensor = fallback_apply_on_physical_tensor; + } + if(!trait->infer_output_attrs && trait->infer_output_attrs_fallible){ + trait->infer_output_attrs = fallback_infer_output_attrs; + } + return *this; +} + +void OpTraitRegistry::do_insert(Typeinfo* type) { + auto&& sd = detail::static_data(); + mgb_assert(sd.type2reg.emplace(type, trait).second); +} + +OpTraitRegistry OpTraitRegistry::do_insert(const char* name) { + auto&& sd = detail::static_data(); + if (name) { + mgb_assert(!sd.name2reg.count(name), + "duplicated opr trait %s", name); + } + sd.registries.emplace_back(name); + auto ret = &sd.registries.back(); + sd.name2reg.emplace(name, ret); + return {ret}; +} + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/impl/op_trait.h b/imperative/src/impl/op_trait.h new file mode 100644 index 0000000000000000000000000000000000000000..bf92bab2d2dc4e2258d7850ce4ee9eb954739973 --- /dev/null +++ b/imperative/src/impl/op_trait.h @@ -0,0 +1,119 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/imperative/op_def.h" + +namespace mgb { +namespace imperative { + +using OpDefMaker = thin_function< + decltype(OpDef::make_from_op_node)>; +using ApplyOnPhysicalTensor = thin_function< + decltype(OpDef::apply_on_physical_tensor)>; +using PhysicalTensorExecutor = thin_function< + decltype(OpDef::exec)>; +using ApplyOnVarNode = thin_function< + decltype(OpDef::apply_on_var_node)>; +using InferOutputAttrsFallible = thin_function< + decltype(OpDef::infer_output_attrs_fallible)>; +using InferOutputAttrs = thin_function< + decltype(OpDef::infer_output_attrs)>; +using GradMaker = thin_function< + decltype(OpDef::make_backward_graph)>; + +struct OpTrait { + const char* name; + OpDefMaker make_from_op_node; + ApplyOnPhysicalTensor apply_on_physical_tensor; + PhysicalTensorExecutor exec; + ApplyOnVarNode apply_on_var_node; + InferOutputAttrsFallible infer_output_attrs_fallible; + InferOutputAttrs infer_output_attrs; + GradMaker make_backward_graph; + OpTrait(const char* name); + static OpTrait* find_by_name(const char* name); + static OpTrait* find_by_typeinfo(Typeinfo* type); + static void for_each_trait(thin_function visitor); +}; + +struct OpTraitRegistry { + OpTrait* trait; + OpTraitRegistry& make_from_op_node(OpDefMaker f) { + trait->make_from_op_node = f; + return *this; + } + OpTraitRegistry& apply_on_physical_tensor(ApplyOnPhysicalTensor f) { + trait->apply_on_physical_tensor = f; + return *this; + } + OpTraitRegistry& physical_tensor_executor(PhysicalTensorExecutor f) { + trait->exec = f; + return *this; + } + OpTraitRegistry& apply_on_var_node(ApplyOnVarNode f) { + trait->apply_on_var_node = f; + return *this; + } + OpTraitRegistry& infer_output_attrs_fallible(InferOutputAttrsFallible f) { + trait->infer_output_attrs_fallible = f; + return *this; + } + OpTraitRegistry& infer_output_attrs(InferOutputAttrs f) { + trait->infer_output_attrs = f; + return *this; + } + OpTraitRegistry& grad_maker(GradMaker f) { + trait->make_backward_graph = f; + return *this; + } + OpTraitRegistry& fallback(); + OpTraitRegistry& finalize(); + + template + void insert() { + do_insert(T::typeinfo()); + } + + template + void insert() { + insert(); + insert(); + } + + template + static OpTraitRegistry insert(const char* name) { + auto&& ret = do_insert(name); + ret.insert(); + return ret; + } + + void do_insert(Typeinfo* type); + + static OpTraitRegistry do_insert(const char* name); +}; + +namespace detail { +struct _RegisterHelper { + OpTraitRegistry registry; + ~_RegisterHelper() { + registry.finalize(); + } +}; +} // namespace detail + +} // namespace imperative +} // namespace mgb + +#define OP_TRAIT_REG(name, ...) \ + static OpTraitRegistry __##name##_global_registry__ = \ + detail::_RegisterHelper{OpTraitRegistry::insert<__VA_ARGS__>(#name)}.registry + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/impl/opr_utility.cpp b/imperative/src/impl/opr_utility.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a39f65669f6a80d0033c7e9419f3b8b77ba33e9c --- /dev/null +++ b/imperative/src/impl/opr_utility.cpp @@ -0,0 +1,185 @@ +/** + * \file src/core/impl/imperative/opr_utility.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "megbrain/imperative/opr_utility.h" + +// FIXME; setup_config_cn is copied from src/opr/impl/utility.cpp +namespace { +mgb::OperatorNodeConfig setup_config_cn(const mgb::OperatorNodeConfig& config_, + const mgb::CompNode& cn) { + auto prev_cn = config_.get_single_comp_node(); + mgb_assert(!prev_cn.valid() || cn == prev_cn); + auto config = config_; + config.comp_node(cn); + return config; +} +} // namespace +namespace mgb { +namespace opr { + +/* ================ InputCallback ================== */ + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(InputCallback); + +InputCallback::InputCallback(cg::ComputingGraph& graph, callback_t callback, + const VarNodeArray& inputs, + const OperatorNodeConfig& config) + : Super(&graph, config, "input_callback", inputs), + m_callback(callback) { + for (VarNode* i : inputs) { + add_input({i}); + } + DType dt = config.output_dtype(); + mgb_assert(dt.valid()); + add_output(None)->add_flag(VarNode::Flag::NO_SYS_MEM_ALLOC).dtype(dt); + add_output(None) + ->add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE) + .add_flag(VarNode::Flag::NO_SYS_MEM_ALLOC) + .dtype(DType::from_enum(DTypeEnum::Byte)); +} + +SymbolVarArray InputCallback::make(cg::ComputingGraph& graph, + callback_t callback, CompNode comp_node, + DType dtype, const SymbolVarArray& inputs) { + mgb_assert(comp_node.valid()); + mgb_assert(dtype.valid()); + OperatorNodeConfig config; + config.comp_node(comp_node); + config.output_dtype(dtype); + auto vinputs = to_var_node_array(inputs); + auto opr = graph.insert_opr( + std::make_unique(graph, callback, vinputs, config)); + return to_symbol_var_array(opr->output()); +} + +void InputCallback::init_output_static_infer_desc() {} + +cg::OperatorNodeBase::NodeProp* InputCallback::do_make_node_prop() const { + NodeProp* prop = Super::do_make_node_prop(); + prop->add_flag(NodeProp::Flag::NO_AUTOMATIC_DUP); + SmallVector dep_types(input().size(), + NodeProp::DepType::DEV_COMP_ORDER); + prop->reset_dep_type(input(), dep_types); + return prop; +} + +void InputCallback::scn_do_execute() { + auto dev_tensor = m_callback(); + output(0)->reset_dev_tensor_from_tensor(dev_tensor); +} + +/* ================ OutputCallback ================== */ + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(OutputCallback); + +OutputCallback::OutputCallback(Param param, const VarNodeArray& inputs, + const OperatorNodeConfig& config) + : Super(inputs[0]->owner_graph(), + setup_config_cn(config, inputs[0]->comp_node()), + "output_callback", inputs), + m_param(std::move(param)) { + for (VarNode* i : inputs) { + add_input({i}); + } + if (!m_param.borrow) { + input(0)->add_flag(VarNode::Flag::NO_SYS_STATIC_MEM_ALLOC); + } + add_output(None) + ->add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE) + .add_flag(VarNode::Flag::NO_SYS_MEM_ALLOC) + .dtype(DType::from_enum(DTypeEnum::Byte)); +} + +SymbolVar OutputCallback::make(Param param, const SymbolVarArray& inputs) { + mgb_assert(inputs.size() >= 1); + auto vinputs = to_var_node_array(inputs); + OperatorNodeConfig config; + return inputs[0].insert_single_output_opr(std::move(param), + vinputs, config); +} + +void OutputCallback::init_output_static_infer_desc() {} + +cg::OperatorNodeBase::NodeProp* OutputCallback::do_make_node_prop() const { + NodeProp* prop = Super::do_make_node_prop(); + prop->add_flag(NodeProp::Flag::NO_AUTOMATIC_DUP); + SmallVector dep_types(input().size(), + NodeProp::DepType::DEV_COMP_ORDER); + dep_types[0] = NodeProp::DepType::DEV_VALUE; + prop->reset_dep_type(input(), dep_types); + return prop; +} + +void OutputCallback::scn_do_execute() { + m_param.callback(input(0)->dev_tensor()); +} + +/* ================ NopCallback ================== */ + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(NopCallback); + +NopCallback::NopCallback(cg::ComputingGraph& graph, callback_t callback, + const VarNodeArray& inputs, + const OperatorNodeConfig& config) + : Super(&graph, config, "nop_callback", inputs), m_callback(callback) { + for (VarNode* i : inputs) { + add_input({i}); + } + add_output(None) + ->add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE) + .add_flag(VarNode::Flag::NO_SYS_MEM_ALLOC) + .dtype(DType::from_enum(DTypeEnum::Byte)); +} + +SymbolVar NopCallback::make(cg::ComputingGraph& graph, callback_t callback, + CompNode comp_node, const SymbolVarArray& inputs) { + mgb_assert(comp_node.valid()); + OperatorNodeConfig config; + config.comp_node(comp_node); + auto vinputs = to_var_node_array(inputs); + auto opr = graph.insert_opr( + std::make_unique(graph, callback, vinputs, config)); + return opr->output(0); +} + +void NopCallback::init_output_static_infer_desc() {} +void NopCallback::on_output_comp_node_stream_changed() {} + +void NopCallback::init_output_comp_node() { + auto cn = config().get_single_comp_node(); + mgb_assert(cn.valid()); + output(0)->comp_node(cn); +} + +cg::OperatorNodeBase::NodeProp* NopCallback::do_make_node_prop() const { + NodeProp* prop = Super::do_make_node_prop(); + SmallVector dep_types(input().size(), + NodeProp::DepType::DEV_COMP_ORDER); + prop->reset_dep_type(input(), dep_types); + prop->add_flag( + cg::OperatorNodeBase::NodeProp::Flag::CROSS_COMP_NODE_MEMORY); + return prop; +} + +void NopCallback::do_execute(ExecEnv& env) { + auto cn = output(0)->comp_node(); + auto runner = [this, cn] { + owner_graph()->event().signal_inplace(this, + cn); + cn.activate(); + m_callback(); + owner_graph()->event().signal_inplace(this, cn); + }; + env.dispatch_on_comp_node(cn, runner); +} + +} // namespace opr +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/impl/ops/backward_graph.cpp b/imperative/src/impl/ops/backward_graph.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d34068d8582fcf872cfea2de791c4cfa357b6ce6 --- /dev/null +++ b/imperative/src/impl/ops/backward_graph.cpp @@ -0,0 +1,113 @@ +/** + * \file src/core/impl/imperative/physical_tensor.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "megbrain/imperative/ops/backward_graph.h" +#include "../op_trait.h" + +namespace mgb { +namespace imperative { + +SmallVector +BackwardGraph::InternalGraph::apply( + const SmallVector& inputs) const { + ThinHashMap node2tensor; + auto&& input_nodes = this->inputs; + mgb_assert(inputs.size() == input_nodes.size()); + for (size_t i = 0; i < inputs.size(); ++ i) { + node2tensor[input_nodes[i]] = inputs[i]; + } + for (auto &&i : constants) { + node2tensor[i.first] = i.second; + } + for (size_t i = 0; i < exprs.size(); ++ i) { + auto&& expr = exprs[i]; + SmallVector inputs; + for (auto &&in : std::get<1>(expr)) { + inputs.push_back(node2tensor.at(in)); + } + auto outputs = OpDef::apply_on_physical_tensor( + *std::get<0>(expr), inputs); + auto output_nodes = std::get<2>(expr); + mgb_assert(outputs.size() == output_nodes.size()); + for (size_t i = 0; i < outputs.size(); ++ i) { + node2tensor[output_nodes[i]] = outputs[i]; + } + } + SmallVector ret; + for (auto &&i : outputs) { + ret.push_back(node2tensor.at(i)); + } + return ret; +} + +SmallVector +BackwardGraph::InternalGraph::infer_attrs( + const SmallVector& inputs) const { + using TensorAttr = LogicalTensorDesc; + ThinHashMap node2attr; + auto&& input_nodes = this->inputs; + mgb_assert(inputs.size() == input_nodes.size()); + for (size_t i = 0; i < inputs.size(); ++ i) { + node2attr[input_nodes[i]] = inputs[i]; + } + for (auto &&i : constants) { + auto* value = i.second->try_get_value(); + mgb_assert(value); + node2attr[i.first] = TensorAttr{ + i.second->layout(), i.second->comp_node(), + value->proxy_to_default_cpu()}; + } + for (size_t i = 0; i < exprs.size(); ++ i) { + auto&& expr = exprs[i]; + SmallVector inputs; + for (auto &&in : std::get<1>(expr)) { + inputs.push_back(node2attr.at(in)); + } + auto outputs = OpDef::infer_output_attrs_fallible( + *std::get<0>(expr), inputs); + auto output_nodes = std::get<2>(expr); + mgb_assert(outputs.size() == output_nodes.size()); + for (size_t i = 0; i < outputs.size(); ++ i) { + node2attr[output_nodes[i]] = outputs[i]; + } + } + SmallVector ret; + for (auto &&i : outputs) { + ret.push_back(node2attr.at(i)); + } + return ret; +} + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(BackwardGraph); + +namespace { +SmallVector backward_impl( + const OpDef& backward_graph, + const SmallVector& tensors) { + return backward_graph.cast_final_safe() + .graph().apply(tensors); +} + +SmallVector infer_tensor_attrs( + const OpDef& backward_graph, + const SmallVector inputs) { + return backward_graph.cast_final_safe() + .graph().infer_attrs(inputs); +} + +OP_TRAIT_REG(BackwardGraph, BackwardGraph) + .apply_on_physical_tensor(backward_impl) + .infer_output_attrs_fallible(infer_tensor_attrs) + .fallback(); +} // anonymous namespace + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/impl/ops/collective_comm.cpp b/imperative/src/impl/ops/collective_comm.cpp new file mode 100644 index 0000000000000000000000000000000000000000..35eca804c4fb3bed37f55c1a5c24f8b5358cab50 --- /dev/null +++ b/imperative/src/impl/ops/collective_comm.cpp @@ -0,0 +1,59 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ +#include "megbrain_build_config.h" + +#if MGB_ENABLE_OPR_MM +#include "../op_trait.h" +#include "../proxy_graph_detail.h" +#include "megbrain/opr/mm_handler.h" +#endif // MGB_ENABLE_OPR_MM + +#include "megbrain/imperative/ops/collective_comm.h" + +namespace mgb { +namespace imperative { + +#if MGB_ENABLE_OPR_MM +namespace { +cg::OperatorNodeBase* apply_on_var_node( + const OpDef& def, + const VarNodeArray& inputs) { + auto&& comm = def.cast_final_safe(); + auto group_client = std::make_shared( + ssprintf("%s:%d", comm.addr.data(), comm.port)); + SmallVector> dev_buffer_arr(1, nullptr); + auto disable = std::make_shared(); + disable->set(0); + + cg::OperatorNodeConfig config; + if (comm.comp_node.size() > 0) { + config.comp_node(CompNode::load(comm.comp_node)); + } + + mgb_assert(inputs.size() == 1, "exactly one input expected"); + auto&& graph = inputs[0]->owner_graph(); + + return graph->insert_opr(std::make_unique( + inputs, graph, comm.key, comm.nr_devices, comm.is_root, comm.rank, + comm.local_grad, group_client, comm.mode, comm.dtype, comm.backend, + dev_buffer_arr, config, disable)); +} + +OP_TRAIT_REG(CollectiveComm, CollectiveComm, opr::CollectiveComm) + .apply_on_var_node(apply_on_var_node) + .fallback(); +} // anonymous namespace +#endif // MGB_ENABLE_OPR_MM + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(CollectiveComm); + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/impl/ops/cond_take.cpp b/imperative/src/impl/ops/cond_take.cpp new file mode 100644 index 0000000000000000000000000000000000000000..de4cd0fbf5146e66aa44481e49714d4cf4535d60 --- /dev/null +++ b/imperative/src/impl/ops/cond_take.cpp @@ -0,0 +1,118 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "megbrain/imperative/ops/cond_take.h" +#include "megbrain/imperative/ops/opr_attr.h" +#include "megbrain/opr/misc.h" +#include "../dnn_op_helper.h" +#include "../op_trait.h" + +using namespace megdnn; + +namespace mgb::imperative { + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(CondTake); + +namespace { + +class MegDNNDynOutMallocImpl final: public megdnn::DynOutMallocPolicy { + using Output = std::array; + + CompNode m_cn; + Output m_out; + + public: + MegDNNDynOutMallocImpl(CompNode cn): m_cn{cn} {} + + megdnn::TensorND alloc_output( + size_t id, DType dtype, const TensorShape &shape, + void *user_data) override; + + void* alloc_workspace(size_t sz, void *user_data) override; + void free_workspace(void *ptr, void *user_data) override; + TensorPtr at(size_t id); +}; + +megdnn::TensorND MegDNNDynOutMallocImpl::alloc_output( + size_t id, DType dtype, const TensorShape &shape, + void * /*user_data*/) { + TensorLayout m_layout(shape, dtype); + m_out[id] = Tensor::make(m_layout, m_cn); + return m_out[id]->dev_tensor().as_megdnn(); +} + +void* MegDNNDynOutMallocImpl::alloc_workspace(size_t sz, void * /*user_data*/) { + return m_cn.alloc_device(sz); +} + +void MegDNNDynOutMallocImpl::free_workspace(void *ptr, void * /*user_data*/) { + m_cn.free_device(ptr); +} + +TensorPtr MegDNNDynOutMallocImpl::at(size_t id) { + return m_out[id]; +} + +cg::OperatorNodeBase* apply_on_var_node( + const OpDef& def, + const VarNodeArray& inputs) { + def.cast_final_safe(); + auto&& graph = inputs[0]->owner_graph(); + + opr::CondTake::Param param; + param.val = 1; + cg::OperatorNodeConfig config; + cg::OperatorNodeBase* opr = graph->insert_opr( + std::make_unique( + inputs[0], inputs[1], param, config)); + return opr; +} + +SmallVector apply_on_physical_tensor( + const OpDef& def, + const SmallVector& inputs) { + auto opr = def.cast_final_safe(); + mgb_assert(opr.same_type()); + mgb_assert(inputs.size() == 2, "CondTake take 2 inputs, got %lu", + inputs.size()); + + auto&& inp = inputs[0]; + auto&& msk = inputs[1]; + mgb_assert(inp->layout().eq_shape(msk->layout()), + "input shape does not match mask shape"); + mgb_assert(msk->get_value().dtype().enumv() == DTypeEnum::Bool, + "mask dtype must be bool"); + DnnOprCaller dnn_op(inp->comp_node()); + dnn_op.op->param().val = 1; + + TensorLayout m_layout({dnn_op.op->get_workspace_in_bytes(inp->layout())}, + dtype::Byte()); + + auto dnn_workspace = dnn_op.create_workspace(m_layout); + MegDNNDynOutMallocImpl policy{inp->comp_node()}; + + dnn_op.op->exec(inp->dev_tensor().as_megdnn(), + msk->dev_tensor().as_megdnn(), + dnn_workspace, + &policy); + + SmallVector out; + out.push_back(policy.at(0)); + out.push_back(policy.at(1)); + return out; +} + +OP_TRAIT_REG(CondTake, CondTake, opr::CondTake) + .apply_on_var_node(apply_on_var_node) + .apply_on_physical_tensor(apply_on_physical_tensor) + .fallback(); + +} // namespace + +} // namespace mgb::imperative \ No newline at end of file diff --git a/imperative/src/impl/ops/io_remote.cpp b/imperative/src/impl/ops/io_remote.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7d7375737f84189b5c48b40dc69cbacf406a0023 --- /dev/null +++ b/imperative/src/impl/ops/io_remote.cpp @@ -0,0 +1,64 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ +#include "megbrain_build_config.h" + +#if MGB_ENABLE_OPR_MM +#include "../op_trait.h" +#include "../proxy_graph_detail.h" +#include "megbrain/opr/io_remote.h" +#include "megbrain/opr/mm_handler.h" +#endif // MGB_ENABLE_OPR_MM + +#include "megbrain/imperative/ops/io_remote.h" + +namespace mgb { +namespace imperative { + +#if MGB_ENABLE_OPR_MM +namespace { +cg::OperatorNodeBase* apply_on_var_node_remote_send( + const OpDef& def, const VarNodeArray& inputs) { + auto&& send = def.cast_final_safe(); + auto group_client = std::make_shared( + ssprintf("%s:%d", send.addr.data(), send.port)); + auto&& graph = inputs[0]->owner_graph(); + + cg::OperatorNodeConfig config; + cg::OperatorNodeBase* opr = + graph->insert_opr(std::make_unique( + send.key, inputs[0], group_client, true, config)); + return opr; +} + +cg::OperatorNodeBase* apply_on_var_node_remote_recv( + const OpDef& def, const VarNodeArray& inputs) { + auto&& recv = def.cast_final_safe(); + auto group_client = std::make_shared( + ssprintf("%s:%d", recv.addr.data(), recv.port)); + auto&& graph = inputs[0]->owner_graph(); + return graph->insert_opr(std::make_unique( + recv.key, *graph, group_client, OperatorNodeConfig{recv.cn}, + recv.shape, recv.dtype)); +} + +OP_TRAIT_REG(RemoteSend, RemoteSend, mgb::opr::RemoteSend) + .apply_on_var_node(apply_on_var_node_remote_send) + .fallback(); + +OP_TRAIT_REG(RemoteRecv, RemoteRecv, mgb::opr::RemoteRecv) + .apply_on_var_node(apply_on_var_node_remote_recv) + .fallback(); +} // anonymous namespace +#endif // MGB_ENABLE_OPR_MM + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(RemoteSend); +MGB_DYN_TYPE_OBJ_FINAL_IMPL(RemoteRecv); + +} // namespace imperative +} // namespace mgb diff --git a/imperative/src/impl/ops/nms.cpp b/imperative/src/impl/ops/nms.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3c34c87afaffd58d824c7366826132941a8420bd --- /dev/null +++ b/imperative/src/impl/ops/nms.cpp @@ -0,0 +1,42 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ +#include "../op_trait.h" + +#include "megbrain/imperative/ops/nms.h" +#include "megbrain/opr/standalone/nms_opr.h" + +namespace mgb { +namespace imperative { + +using NMSKeepOpr = opr::standalone::NMSKeep; + +namespace { +cg::OperatorNodeBase* apply_on_var_node( + const OpDef& def, + const VarNodeArray& inputs) { + auto&& nms_keep = def.cast_final_safe(); + + NMSKeepOpr::Param param; + param.iou_thresh = nms_keep.iou_thresh; + param.max_output = nms_keep.max_output; + + return NMSKeepOpr::make(inputs[0], param).node()->owner_opr(); +} + +OP_TRAIT_REG(NMSKeep, NMSKeep, NMSKeepOpr) + .apply_on_var_node(apply_on_var_node) + .fallback(); +} // anonymous namespace + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(NMSKeep); + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/impl/ops/opr_attr.cpp b/imperative/src/impl/ops/opr_attr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..feb52c76a51befd27703ad7cf3c0b68f76366c60 --- /dev/null +++ b/imperative/src/impl/ops/opr_attr.cpp @@ -0,0 +1,123 @@ +/** + * \file src/core/impl/imperative/physical_tensor.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "megbrain/imperative/ops/opr_attr.h" +#include "megbrain/serialization/opr_load_dump.h" + +#include "../op_trait.h" +#include "../proxy_graph_detail.h" + +namespace mgb { +namespace imperative { + +namespace { +class OprParamsLoadContext final: public serialization::OprLoadContextRawPOD { + const OprAttr::Param& m_param; + size_t m_pos = 0; + ComputingGraph *m_graph; + + void read_raw(void *dest, size_t size) override final { + mgb_assert(m_pos + size <= m_param.size(), "too many bytes requested"); + memcpy(dest, m_param.data() + m_pos, size); + m_pos += size; + } + + std::shared_ptr load_tensor() override { + mgb_assert(0); + } + + std::shared_ptr load_tensor_shared() override { + mgb_assert(0); + } + + const serialization::GraphLoadConfig& config() const override { + mgb_assert(0); + } + + public: + OprParamsLoadContext(const OprAttr::Param& param, + ComputingGraph *graph): + serialization::OprLoadContextRawPOD(false), m_param(param), m_graph(graph) + {} + + ~OprParamsLoadContext() { + mgb_assert(m_pos == m_param.size(), "param not fully consumed"); + } + + ComputingGraph& graph() override { + return *m_graph; + } +}; + +class OprParamsDumpContext final: public serialization::OprDumpContextRawPOD { +public: + OprAttr::Param m_param; + OprParamsDumpContext() : serialization::OprDumpContextRawPOD(false) {} + void write_raw(const void *data, size_t size) { + const char* src = static_cast(data); + m_param.insert(m_param.end(), src, src + size); + } + void dump_tensor( + const std::string &name, + const HostTensorND &tensor, + TensorWriteMethod method) { + mgb_assert(0); + } + const serialization::GraphDumpConfig& config() const { + mgb_assert(0); + } +}; + +cg::OperatorNodeBase* apply_on_var_node( + const OpDef& def, const VarNodeArray& inputs) { + auto&& attr = def.cast_final_safe(); + mgb_assert(!inputs.empty()); + auto registry = serialization::OprRegistry::find_by_name(attr.type); + mgb_assert(registry, "operator %s not found", attr.type.c_str()); + OprParamsLoadContext ctx{attr.param, inputs[0]->owner_graph()}; + return registry->loader(ctx, inputs, attr.config); +} + +std::shared_ptr make_from_op_node(cg::OperatorNodeBase* opr) { + OprParamsDumpContext ctx; + auto registry = serialization::OprRegistry::find_by_type(opr->dyn_typeinfo()); + mgb_assert(registry, "operator %s not found", opr->dyn_typeinfo()->name); + mgb_assert(registry->dumper, "operator %s cannot be serialized", opr->dyn_typeinfo()->name); + registry->dumper(ctx, *opr); + return OprAttr::make(registry->name, std::move(ctx.m_param), opr->config()); +} + +OP_TRAIT_REG(OprAttr, OprAttr) + .make_from_op_node(make_from_op_node) + .apply_on_var_node(apply_on_var_node) + .fallback(); + +} // anonymous namespace + +bool OprAttr::is_same_st(const Hashable& rhs_) const { + auto&& rhs = static_cast(rhs_); + return type == rhs.type && param == rhs.param + && config.comp_node() == rhs.config.comp_node() + && config.output_dtype() == rhs.config.output_dtype(); +} + +size_t OprAttr::hash() const { + return hash_pair_combine( + hash_pair_combine( + mgb::hash(type), + mgb::hash(static_cast>(param))), + config.hash()); +} + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(OprAttr); + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/impl/ops/tensor_manip.cpp b/imperative/src/impl/ops/tensor_manip.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e8a84e6911ac4fe1a8221768bc6b4e60f14c9e2e --- /dev/null +++ b/imperative/src/impl/ops/tensor_manip.cpp @@ -0,0 +1,145 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "megbrain/imperative/ops/tensor_manip.h" +#include "megbrain/imperative/ops/opr_attr.h" +#include "megbrain/opr/tensor_manip.h" +#include "../op_trait.h" + +namespace mgb::imperative { +namespace { + +cg::OperatorNodeBase* apply_on_var_node( + const OpDef& def, + const VarNodeArray& inputs) { + def.cast_final_safe(); + return opr::GetVarShape::make(inputs).node()->owner_opr(); +} + +SmallVector apply_on_physical_tensor( + const OpDef& def, + const SmallVector& inputs) { + def.cast_final_safe(); + mgb_assert(inputs.size() == 1, "GetVarShape take 1 input, got %lu", inputs.size()); + auto&& inp = inputs[0]; + auto&& shp = inp->layout(); + mgb_assert(shp.ndim != 0, "input shape invalid"); + HostTensorND hv(inp->comp_node(), {shp.ndim}, dtype::Int32()); + auto* ptr = hv.ptr(); + for (size_t i = 0; i < shp.ndim; ++i) { + ptr[i] = shp.shape[i]; + } + return {Tensor::make(std::move(hv))}; +} + +SmallVector infer_output_attrs_fallible( + const OpDef& def, + const SmallVector& inputs) { + def.cast_final_safe(); + mgb_assert(inputs.size() == 1, "GetVarShape take 1 input, got %lu", inputs.size()); + auto&& desc = inputs[0]; + if (!desc.layout.ndim) { + return {{TensorLayout(dtype::Int32()), desc.comp_node}}; + } + DeviceTensorND value(CompNode::default_cpu(), {desc.layout.ndim}, dtype::Int32()); + auto* ptr = value.ptr(); + for (size_t i = 0; i < desc.layout.ndim; ++i) { + ptr[i] = desc.layout[i]; + } + return {{value.layout(), desc.comp_node, std::move(value)}}; +} + +std::shared_ptr make_from_op_node(cg::OperatorNodeBase* node_) { + auto* node = &node_->cast_final_safe(); + if (node->config().comp_node().size() || + node->config().output_dtype().valid() || + node->param().axis != opr::GetVarShape::Param::INVALID_AXIS) { + mgb_log_warn("weird GetVarShape"); + return OpTrait::find_by_typeinfo(OprAttr::typeinfo())->make_from_op_node(node); + } + return GetVarShape::make(); +} + +OP_TRAIT_REG(GetVarShape, GetVarShape, opr::GetVarShape) + .make_from_op_node(make_from_op_node) + .infer_output_attrs_fallible(infer_output_attrs_fallible) + .apply_on_var_node(apply_on_var_node) + .apply_on_physical_tensor(apply_on_physical_tensor) + .fallback(); + +TensorShapeArray get_shapes(const std::vector>& shapes) { + TensorShapeArray ret; + for (auto&& i:shapes) { + SmallVector shape(i.begin(), i.end()); + TensorShape shp(shape); + ret.push_back(shp); + } + return ret; +} + +cg::OperatorNodeBase* param_pack_split_apply_on_var_node( + const OpDef& def, const VarNodeArray& inputs) { + auto&& param = def.cast_final_safe(); + auto&& graph = inputs[0]->owner_graph(); + + auto&& shapes = get_shapes(param.shapes); + cg::OperatorNodeConfig config; + cg::OperatorNodeBase* opr = + graph->insert_opr(std::make_unique( + inputs[0], param.offsets, shapes, config)); + return opr; +} + +SmallVector param_pack_split_apply_on_physical_tensor( + const OpDef& def, + const SmallVector& inputs) { + auto param = def.cast_final_safe(); + mgb_assert(inputs.size() == 1, "ParamPackSplit take 1 input, got %lu", inputs.size()); + auto&& inp = inputs[0]; + auto&& shp = inp->layout(); + mgb_assert(shp.ndim == 1, "ParamPackSplit input shape invalid, ndim should be 1"); + mgb_assert(param.shapes.size() * 2 == param.offsets.size()); + SmallVector ret; + auto&& shapes = get_shapes(param.shapes); + size_t dtype_size = inputs[0]->layout().dtype.size(); + for (size_t i = 0; i < shapes.size(); ++i) { + ret.push_back( + inputs[0]->sub(param.offsets[i * 2] * dtype_size, shapes[i])); + } + return ret; +} + +OP_TRAIT_REG(ParamPackSplit, ParamPackSplit, mgb::opr::ParamPackSplit) + .apply_on_var_node(param_pack_split_apply_on_var_node) + .apply_on_physical_tensor(param_pack_split_apply_on_physical_tensor) + .fallback(); + +cg::OperatorNodeBase* param_pack_concat_apply_on_var_node( + const OpDef& def, const VarNodeArray& inputs) { + auto&& param = def.cast_final_safe(); + auto&& graph = inputs[0]->owner_graph(); + + VarNodeArray inps(inputs.begin(), inputs.end() - 1); + cg::OperatorNodeConfig config; + cg::OperatorNodeBase* opr = + graph->insert_opr(std::make_unique( + inps, inputs.back(), param.offsets, config)); + return opr; +} + +OP_TRAIT_REG(ParamPackConcat, ParamPackConcat, mgb::opr::ParamPackConcat) + .apply_on_var_node(param_pack_concat_apply_on_var_node) + .fallback(); +} // namespace + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(GetVarShape); +MGB_DYN_TYPE_OBJ_FINAL_IMPL(ParamPackSplit); +MGB_DYN_TYPE_OBJ_FINAL_IMPL(ParamPackConcat); + +} // namespace mgb::imperative diff --git a/imperative/src/impl/physical_tensor.cpp b/imperative/src/impl/physical_tensor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..268b6d24cf5d311e08dd489d17004eaa605bb00a --- /dev/null +++ b/imperative/src/impl/physical_tensor.cpp @@ -0,0 +1,425 @@ +/** + * \file src/core/impl/imperative/physical_tensor.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "megbrain/imperative.h" +#include "megbrain/imperative/blob_manager.h" +#include + +namespace mgb { +namespace imperative { + +namespace { + +class EventPool : CompNodeDepedentObject { + CompNode::UnorderedMap m_cn2pool; + Spinlock m_lock; + + EventPool() = default; +public: + static EventPool& inst() { + static Spinlock lock; + static std::unique_ptr ptr; + MGB_LOCK_GUARD(lock); + if (!ptr || ptr->is_finalized()) { + ptr.reset(new EventPool()); + } + return *ptr; + } + CompNode::Event* alloc(CompNode cn) { + CompNode::EventPool *pool; + { + MGB_LOCK_GUARD(m_lock); + auto iter = m_cn2pool.find(cn); + if (iter == m_cn2pool.end()) { + iter = m_cn2pool.emplace( + std::piecewise_construct, + std::forward_as_tuple(cn), + std::forward_as_tuple(cn)).first; + } + pool = &iter->second; + } + return pool->alloc(); + } + void free(CompNode::Event* event) { + CompNode::EventPool* pool; + { + MGB_LOCK_GUARD(m_lock); + pool = &m_cn2pool.at(event->comp_node()); + } + pool->free(event); + } + std::shared_ptr on_comp_node_finalize() override { + MGB_LOCK_GUARD(m_lock); + for (auto&& i : m_cn2pool) { + i.second.assert_all_freed(); + } + return {}; + } + ~EventPool() { + for (auto&& i : m_cn2pool) { + i.second.assert_all_freed(); + } + } +}; + +class AsyncReleaser : public CompNodeDepedentObject { + struct WaiterParam { + CompNode cn; + CompNode::Event *event; + BlobPtr blob; + HostTensorStorage::RawStorage storage; + }; + class Waiter final: public AsyncQueueSC { + AsyncReleaser *m_par_releaser; + + public: + Waiter(AsyncReleaser *releaser): + m_par_releaser(releaser) + { + } + + void process_one_task(WaiterParam ¶m) { + if (param.event->finished()) { + param.blob.reset(); + param.storage.reset(); + EventPool::inst().free(param.event); + return; + } + + using namespace std::literals; + std::this_thread::sleep_for(1us); + add_task(std::move(param)); + } + }; + Waiter m_waiter{this}; + +protected: + std::shared_ptr on_comp_node_finalize() override { + m_waiter.wait_task_queue_empty(); + return {}; + } + +public: + static AsyncReleaser* inst() { + static AsyncReleaser releaser; + return &releaser; + } + + ~AsyncReleaser() { + m_waiter.wait_task_queue_empty(); + } + + void add(BlobPtr blob, CompNode cn) { + add(cn, std::move(blob), {}); + } + + void add(const HostTensorND& hv) { + add(hv.comp_node(), {}, hv.storage().raw_storage()); + } + + void add(CompNode cn, BlobPtr blob, HostTensorStorage::RawStorage storage = {}) { + auto event = EventPool::inst().alloc(cn); + event->record(); + m_waiter.add_task({cn, event, std::move(blob), std::move(storage)}); + } +}; + +class CompNodeSyncManager : public CompNodeDepedentObject { + ThinHashMap> m_blob2event; + std::mutex m_mtx; +private: + static CompNodeSyncManager mgr; +public: + std::shared_ptr on_comp_node_finalize() override { + MGB_LOCK_GUARD(m_mtx); + m_blob2event.clear(); + return {}; + } + + static CompNodeSyncManager* inst() { + return &mgr; + } + + CompNode::Event* get_or_create_event(Blob* blob) { + mgb_assert(!is_finalized()); + MGB_LOCK_GUARD(m_mtx); + auto&& e = m_blob2event[blob]; + if (!e) { + e = blob->comp_node().create_event(); + } + return e.get(); + } + + void remove(Blob* blob) { + MGB_LOCK_GUARD(m_mtx); + m_blob2event.erase(blob); + } +}; +CompNodeSyncManager CompNodeSyncManager::mgr; + +// Cache for small blobs +// 1. A blob has to be seen twice (within a window) to be eligible for cache +// 2. Cache eviction occurs when cache size reaches a threshold, in least frequently used order +class ConstTensorCache { +public: + struct Entry { + size_t hitcnt = 0; + std::unique_ptr data; + size_t size; + BlobPtr blob; + + Entry(const dt_byte* ptr, size_t size_, BlobPtr blob_) + : data(new dt_byte[size_]), size(size_), blob(blob_) { + memcpy(data.get(), ptr, size); + } + + // does not check input + bool match(const HostTensorND& hv) { + return 0 == memcmp(data.get(), hv.raw_ptr(), hv.layout().span().high_byte); + } + }; + + bool check(const HostTensorND& hv) { + auto&& layout = hv.layout(); + auto&& span = layout.span(); + return hv.format().is_default() && !hv.empty() && + layout.is_contiguous() && span.low_byte == 0 && + span.high_byte <= max_bytes; + } + + // hash storage; does not check input + static uint64_t hash(const HostTensorND& hv) { + auto&& span = hv.layout().span(); + return XXHash{} + .update(hv.raw_ptr(), span.high_byte) + .digest(); + } + + BlobPtr lookup(const HostTensorND& hv) { + if (!check(hv)) { + return {}; + } + auto h = hash(hv); + MGB_LOCK_GUARD(mtx); + // lookup in g1 + auto it = g1.find(h); + if (it != g1.end()) { + if (!it->second.match(hv)) { + mgb_log_warn("hash collision in const tensor cache"); + return {}; + } + it->second.hitcnt += 1; + return it->second.blob; + } + // lookup in g0 + if (!g0.extract(h) && !g0b.extract(h)) { + maybe_collect_g0(); + g0.emplace(h); + return {}; + } + // add new entry to g1 + maybe_collect_g1(); + Entry entry(hv.raw_ptr(), hv.layout().span().high_byte, Tensor(hv).blob()); + it = g1.emplace_hint(it, h, std::move(entry)); + it->second.hitcnt += 1; + return it->second.blob; + } + + void clear() { + MGB_LOCK_GUARD(mtx); + g0.clear(); + g0b.clear(); + g1.clear(); + } + + std::mutex mtx; + size_t hwm = 1024, lwm = 512, max_bytes = TensorShape::MAX_NDIM * 8, window = 65536; + +private: + void maybe_collect_g0() { + if (g0.size() > window) { + std::swap(g0, g0b); + g0.clear(); + } + } + void maybe_collect_g1() { + if (g1.size() <= hwm) return; + + using KV = std::pair; + std::vector tmp; + tmp.reserve(g1.size()); + for (auto&& kv : g1) { + tmp.emplace_back(kv.first, std::move(kv.second)); + } + std::nth_element(tmp.begin(), tmp.begin() + lwm, tmp.end(), [](const KV& lhs, const KV& rhs) { + return lhs.second.hitcnt > rhs.second.hitcnt; + }); + g1.clear(); + for (auto&& kv : tmp) { + kv.second.hitcnt = 0; + g1.emplace(std::move(kv)); + } + } + std::unordered_set g0, g0b; + std::unordered_map g1; +}; + +struct MultiCNConstTensorCache : CompNodeDepedentObject { + std::mutex mtx; + CompNode::UnorderedMap cn2cache; + + std::shared_ptr on_comp_node_finalize() { + MGB_LOCK_GUARD(mtx); + cn2cache.clear(); + return {}; + } + + BlobPtr lookup(const HostTensorND& hv) { + MGB_LOCK_GUARD(mtx); + return cn2cache[hv.comp_node()].lookup(hv); + } +}; + +MultiCNConstTensorCache const_tensor_cache; + +} // namespace + +void EventDeleter::operator()(CompNode::Event* event) { + EventPool::inst().free(event); +} + +Blob::Blob(const DeviceTensorStorage& s): + m_comp_node{s.comp_node()}, m_storage{s.raw_storage()}, + m_size{s.size()} { + BlobManager::inst()->register_blob(this); +} + +Blob::Blob(CompNode cn, size_t sz): + m_comp_node{cn}, m_storage{}, m_size{sz} { + BlobManager::inst()->register_blob(this); +} + +Blob::~Blob() { + BlobManager::inst()->unregister_blob(this); + CompNodeSyncManager::inst()->remove(this); +} + +const Blob::RawStorage& Blob::storage() { + if (!m_storage) { + BlobManager::inst()->alloc_with_defrag(this, m_size); + } + return m_storage; +} + +Tensor::Tensor(BlobPtr blob, const TensorLayout& layout, size_t offset, const HostTensorND& hv) + : m_layout(layout), m_blob(std::move(blob)), m_offset(offset), m_value(hv) { +} + +Tensor::Tensor(const HostTensorND &hv) + : Tensor(hv.layout(), hv.comp_node()) { + m_value = hv; + dev_tensor().copy_from_fixlayout(hv); + // even though hv is saved in m_value, Tensor itself could be + // released before copy completes + AsyncReleaser::inst()->add(hv); +} + +Tensor::Tensor(const DeviceTensorND &dv, const HostTensorND& hv) { + if (!hv.empty()) { + mgb_assert(dv.comp_node() == hv.comp_node()); + mgb_assert(dv.dtype() == hv.dtype()); + mgb_assert(dv.shape().eq_shape(hv.shape())); + m_value = hv; + } + m_layout = dv.layout(); + m_blob = Blob::make(dv.storage()); + m_offset = 0; +} + +Tensor::Tensor(const TensorLayout& layout, const CompNode& cn) + : m_layout{layout}, m_blob{Blob::make(cn, layout.dtype.size(layout.total_nr_elems()))}, + m_offset{0} {} + +Tensor::Tensor(const BlobPtr blob, const size_t offset, const TensorLayout& layout) + : m_layout{layout}, m_blob{blob}, m_offset{offset} {} + +TensorPtr Tensor::make(const HostTensorND& hv) { + auto&& blob = const_tensor_cache.lookup(hv); + if (blob) { + return make(std::forward(blob), hv.layout(), hv); + } + return std::make_shared(hv); +} + +DeviceTensorND Tensor::dev_tensor() { + mgb_assert(m_blob, "uninitialized tensor."); + DeviceTensorStorage storage; + storage.reset(m_blob->comp_node(), m_blob->size(), m_blob->storage()); + storage = storage.sub(m_offset); + DeviceTensorND ret; + ret.reset(storage, m_layout); + return ret; +} + +void Tensor::fetch_value() { + MGB_LOCK_GUARD(m_mtx); + if (m_value.empty()) { + m_value.copy_from(dev_tensor()); + m_value_ready.reset(EventPool::inst().alloc(comp_node())); + m_value_ready->record(); + } +} + +bool Tensor::value_fetched() { + MGB_LOCK_GUARD(m_mtx); + return m_value.layout().ndim != 0; +} + +const HostTensorND& Tensor::get_value() { + fetch_value(); + if (m_value_ready) { + m_value_ready->host_wait(); + } + return m_value; +} + +const HostTensorND* Tensor::try_get_value() { + MGB_LOCK_GUARD(m_mtx); + if (!m_value.empty() && (!m_value_ready || m_value_ready->finished())) { + return &m_value; + } + return nullptr; +} + +TensorPtr Tensor::make_scalar(DTypeScalar value, CompNode cn) { + HostTensorND hv{cn, value.dtype()}; + hv.resize({1}); + memcpy(hv.raw_ptr(), value.storage(), value.dtype().size(1)); + return make(hv); +} + +TensorPtr Tensor::sub(size_t offset, TensorShape shape) { + TensorLayout layout(shape, m_layout.dtype); + return Tensor::make(m_blob, offset + m_offset, layout); +} + +void Tensor::add_release_callback(CompNode cn) { + AsyncReleaser::inst()->add(m_blob, cn); +} + +CompNode::Event* Tensor::get_or_create_event() { + auto e = CompNodeSyncManager::inst()->get_or_create_event(m_blob.get()); + e->record(); + return e; +} + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/impl/profiler.cpp b/imperative/src/impl/profiler.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f35f5b3c9232c276ac3b0db93308e66ea5833ba0 --- /dev/null +++ b/imperative/src/impl/profiler.cpp @@ -0,0 +1,214 @@ +/** + * \file src/core/impl/imperative/profiler.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "megbrain/imperative/profiler.h" + +#include +#include + +#include "megbrain/imperative/ops/opr_attr.h" +#include "megbrain/imperative/physical_tensor.h" + +#include "./op_trait.h" + +namespace mgb { + +namespace imperative { + +class OpDefInfo{ +public: + size_t id; + std::string name; +}; + +class ProfilerEntry { +public: + ProfilerEntry(size_t index, Profiler::EventKind type, std::unique_ptr device) + : index{index}, type{type}, device{std::move(device)}{ + } + ProfilerEntry(size_t index, Profiler::EventKind type, double host): index{index}, type{type}, host{host}{ + } + size_t index; + Profiler::EventKind type; + std::unique_ptr device = nullptr; + double host = 0; +}; + +class ProfilerPrivate { +public: + std::vector op_list; + std::vector entry_list; + std::vector> event_list; + std::vector>> + hook_list; + ThinHashMap> + comp_node_begin_map; + ThinHashMap comp_node_end_map; + RealTimer timer; + size_t dump_count = 0; + bool enabled = false; + std::string path; +}; + +namespace { +CompNode::UnorderedSet collect_comp_nodes( + const OpDef& def, const SmallVector& inputs) { + CompNode::UnorderedSet comp_nodes; + for (auto&& input : inputs) { + comp_nodes.insert(input->comp_node()); + } + for (auto&& output_attr : def.infer_output_attrs(def, inputs)) { + comp_nodes.insert(output_attr.comp_node); + } + return comp_nodes; +} +} // namespace + +std::unique_ptr Profiler::create_event(CompNode comp_node){ + auto event = comp_node.create_event(CompNode::Event::NEED_TIMER); + event->record(); + auto& [begin, time] = m_private->comp_node_begin_map[comp_node]; + if (begin == nullptr) { + begin = event.get(); + time = m_private->timer.get_msecs(); + } + return event; +} + +double Profiler::get_host_time_now(){ + return m_private->timer.get_msecs(); +} + +double Profiler::get_device_time(CompNode::Event& event) { + auto [base_event, host_time] = + m_private->comp_node_begin_map[event.comp_node()]; + if (base_event == &event) { + return host_time; + } else { + return host_time + base_event->elapsed_time_until(event) * 1000; + } +} + +size_t Profiler::get_dump_count(){ + return m_private->dump_count; +} + +Profiler::Profiler() { + m_private = std::make_unique(); +} + +Profiler::Profiler(const std::string& path): Profiler() { + m_private->path = path; +} + +void Profiler::enable() { + m_private->enabled = true; + CompNode::sync_all(); + OpTrait::for_each_trait([this](OpTrait& trait) { + auto backup = std::make_unique( + std::move(trait.apply_on_physical_tensor)); + trait.apply_on_physical_tensor = + [this, backup = backup.get()] ( + const OpDef& def, + const SmallVector& inputs){ + size_t index = m_private->op_list.size(); + std::string name = "[" + std::to_string(index) + "]" + print_op(def); + m_private->op_list.push_back({reinterpret_cast(&def), name}); + m_private->entry_list.emplace_back(index, OprBegin, get_host_time_now()); + auto&& comp_nodes = collect_comp_nodes(def, inputs); + for (auto&& comp_node : comp_nodes) { + m_private->entry_list.emplace_back(index, OprBegin, create_event(comp_node)); + } + auto output = (*backup)(def, inputs); + for (auto&& comp_node : comp_nodes) { + m_private->entry_list.emplace_back(index, OprEnd, create_event(comp_node)); + } + m_private->entry_list.emplace_back(index, OprEnd, get_host_time_now()); + return output; + }; + m_private->hook_list.push_back({&trait, std::move(backup)}); + }); +} + +void Profiler::disable() { + for (auto&& hook : m_private->hook_list) { + std::get<0>(hook)->apply_on_physical_tensor = + std::move(*std::get<1>(hook)); + } + m_private->hook_list.clear(); + m_private->enabled = false; +} + +Profiler::~Profiler() { +} + +void Profiler::dump(){ + dump(m_private->path); +} + +void Profiler::dump(const std::string& path) { + using namespace json; + auto obj = json::Object::make(); + if (!(*obj)["traceEvents"]) { + (*obj)["traceEvents"] = Array::make(); + } + auto& trace_events = (*obj)["traceEvents"]->cast_final(); + for (auto&& entry : m_private->entry_list) { + auto trace_event_ptr = Object::make(); + auto& trace_event = *trace_event_ptr; + std::string name; + size_t id; + int pid; + std::string tid; + double ts; + const char* ph; + name = m_private->op_list[entry.index].name; + id = entry.index; + pid = getpid(); + if (entry.device) { + entry.device->host_wait(); + ts = get_device_time(*entry.device); + tid = entry.device->comp_node().to_string(); + } else { + ts = entry.host; + tid = "host"; + } + switch (entry.type) { + case OprBegin: { + ph = "B"; + break; + } + case OprEnd: { + ph = "E"; + break; + } + } + trace_event["name"] = String::make(name); + trace_event["id"] = Number::make(id); + trace_event["pid"] = Number::make(pid); + trace_event["tid"] = String::make(tid); + trace_event["ts"] = Number::make(ts * 1000); + trace_event["ph"] = String::make(ph); + trace_events.add(std::move(trace_event_ptr)); + } + obj->writeto_fpath(path.empty() ? path : m_private->path); + m_private->dump_count++; +} + +std::string Profiler::print_op(const OpDef& def){ + auto* opr_attr = def.try_cast_final(); + if(opr_attr){ + return std::string("OprAttr:") + opr_attr->type; + } + return def.dyn_typeinfo()->name; +} + +} // namespace imperative + +} // namespace mgb diff --git a/imperative/src/impl/proxy_graph.cpp b/imperative/src/impl/proxy_graph.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b750749ff921e5293b838a0b31b0021f53892bd4 --- /dev/null +++ b/imperative/src/impl/proxy_graph.cpp @@ -0,0 +1,850 @@ +/** + * \file src/core/impl/imperative/proxy_graph.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "./blob_manager_impl.h" +#include "./proxy_graph.h" +#include "megbrain/graph/static_infer.h" +#include "megbrain/graph/operator_node.h" +#include "megbrain/opr/io.h" +#include "megbrain/opr/utility.h" +#include "megbrain/imperative/ops/opr_attr.h" +#include "megbrain/imperative/ops/backward_graph.h" + +namespace mgb { +namespace imperative { + +using cg::OperatorNodeBase; + +template +constexpr auto&& select(T&& t, F&& f) { + if constexpr (p) { + return std::forward(t); + } else { + return std::forward(f); + } +} + +MGB_DEFINE_OPR_CLASS( + ProxyGraph::InputPlaceholder, + cg::OperatorNodeBase) // { + + void on_output_comp_node_stream_changed() override { + mgb_assert(0); + } + // TODO: consider implement following initialization method, + // so InputPlaceholder can be initialized correctly during + // operator insertion + void init_output_comp_node() override { + } + void init_output_format() override { + } + void init_output_dtype() override { + } + void init_output_static_infer_desc() override { + } + void init_output_mem_plan(bool dynamic) override { + MGB_MARK_USED_VAR(dynamic); + mgb_assert(0); + } + void do_execute(ExecEnv &env) override { + mgb_assert(0); + } + +public: + Tensor* m_tensor; + + InputPlaceholder(ComputingGraph& graph, Tensor* tensor = nullptr, + const DeviceTensorND& static_infer_value = {}) + : Super(&graph, {}, "device_value", {}), m_tensor(tensor), + m_static_infer_value(static_infer_value) { + mgb_assert(m_static_infer_value.empty() || + m_static_infer_value.comp_node() == CompNode::default_cpu()); + add_output(None)->add_flag(VarNode::Flag::NO_SYS_MEM_ALLOC); + // never dedup + add_equivalence_component>(this); + } + + static SymbolVar make(ComputingGraph& graph, Tensor& tensor) { + auto opr = graph.insert_opr( + std::make_unique(graph, &tensor)); + auto var = opr->output(0); + auto&& dev_tensor = tensor.dev_tensor(); + var->m_comp_node = dev_tensor.comp_node(); + var->m_shape = dev_tensor.shape(); + var->m_dev_tensor = dev_tensor; + var->reset_dev_tensor_from_tensor(dev_tensor); + return var; + } + + static SymbolVar make(ComputingGraph& graph, const LogicalTensorDesc& desc) { + auto opr = graph.insert_opr( + std::make_unique(graph, nullptr, desc.value)); + auto var = opr->output(0); + var->m_comp_node = desc.comp_node; + var->m_shape = desc.layout; + var->m_dev_tensor.reset({}, TensorLayout(desc.layout.dtype)); + return var; + } + + const DeviceTensorND* get_static_infer_value(bool may_sync) { + if (!m_static_infer_value.empty()) { + return &m_static_infer_value; + } + if (m_tensor && (may_sync || m_tensor->try_get_value())) { + auto&& hv = m_tensor->get_value(); + mgb_assert(!hv.empty()); + m_static_infer_value = hv.proxy_to_default_cpu(); + // steal ownership from shared_ptr + using SP = std::shared_ptr; + auto& sp = const_cast(m_static_infer_value.storage().raw_storage()); + static auto dummy = std::make_shared(); + sp = SP(dummy, sp.get()); + return &m_static_infer_value; + } + return nullptr; + } + +private: + DeviceTensorND m_static_infer_value; +}; +MGB_DYN_TYPE_OBJ_FINAL_IMPL( + ProxyGraph::InputPlaceholder); + +class ProxyGraph::ExecEnv final : public cg::GraphExecutable::ExecEnv { + +public: + void dispatch_on_comp_node(CompNode, Task&& task) override { + task(); + } + + void dispatch_on_comp_node_with_mask(CompNode, Task&& task, + cg::ExecutionMask* mask) override { + mgb_throw_if(mask, GraphError, + "ExecutionMask not supported in imperative mode"); + task(); + } + + void pause_exec() override {} + + void resume_exec() override {} +}; + +class ProxyGraph::StaticInferManager : public cg::static_infer::StaticInferManager { +public: + using Tag = cg::static_infer::Tag; + using ShapeInferDesc = cg::static_infer::ShapeInferDesc; + using ValueInferDesc = cg::static_infer::ValueInferDesc; + using InferType = cg::static_infer::InferType; + using DepVal = cg::static_infer::DepVal; + using DepElement = cg::static_infer::DepElement; + using DepType = cg::static_infer::DepType; + using InpElement = cg::static_infer::InpElement; + + struct Result { + TensorShape shape; + DeviceTensorND value; + }; + + ProxyGraph* owner; + cg::OperatorNodeBase* cur_opr = nullptr; + std::vector> shape_descs; + std::vector> value_descs; + std::vector inferred_outputs; + + StaticInferManager(ProxyGraph* owner_) : owner(owner_) {} + + size_t locate_output(VarNode* var) { + mgb_assert(cur_opr); + auto&& output_vars = cur_opr->output(); + mgb_assert(shape_descs.size() == output_vars.size()); + auto&& it = std::find(output_vars.begin(), output_vars.end(), var); + mgb_assert(it != output_vars.end()); + return it - output_vars.begin(); + } + + void register_shape_infer(Tag dest, const ShapeInferDesc &desc) override { + auto i = locate_output(dest); + mgb_assert(!shape_descs[i]); + shape_descs[i].emplace(desc); + } + + void register_value_infer(Tag dest, const ValueInferDesc &desc) override { + auto i = locate_output(dest); + mgb_assert(!value_descs[i]); + value_descs[i].emplace(desc); + } + + InferType get_infer_type(Tag var) override { + // may be called during get_proxy_opr or make_backward_graph + + // don't let opr apply any immediate optimization + return {InferType::MISSING_INP, InferType::MISSING_INP}; + + if (auto opr = var->owner_opr()->try_cast_final()) { + return {var->shape().ndim ? InferType::CONST : InferType::MISSING_INP, + opr->m_tensor ? InferType::CONST : InferType::MISSING_INP}; + } + if (cur_opr) { + auto&& outputs = cur_opr->output(); + auto&& it = std::find(outputs.begin(), outputs.end(), var); + if (it != outputs.end()) { + return {infer_shape_fallible(var) ? InferType::CONST : InferType::MISSING_INP, + // value inference could be expensive + InferType::MISSING_INP}; + } + } + return {InferType::MISSING_INP, InferType::MISSING_INP}; + } + + void update() { + if (cur_opr != owner->m_cur_opr) { + clear(); + cur_opr = owner->m_cur_opr; + if (cur_opr) { + auto nout = cur_opr->output().size(); + shape_descs.resize(nout); + value_descs.resize(nout); + inferred_outputs.resize(nout); + cur_opr->init_output_static_infer_desc(); + } + } + } + + void clear() { + cur_opr = nullptr; + shape_descs.clear(); + value_descs.clear(); + inferred_outputs.clear(); + } + + template + auto do_infer(Tag dest, bool may_sync) + -> const std::conditional_t* { + // Some infer_func does not use InpVal passed to them, but + // call infer_* on their inputs instead, so dest could be an input. + // It is also possible that an opr call infer_* on its inputs before it + // is inserted + if (auto opr = dest->owner_opr()->try_cast_final()) { + if constexpr (is_shape) { + auto* shp = &dest->shape(); + return shp->ndim ? shp : nullptr; + } else { + return opr->get_static_infer_value(may_sync); + } + } + + mgb_assert(cur_opr); + mgb_assert(cur_opr->output().size() == shape_descs.size()); + + // dest must be an output now + auto i = locate_output(dest); + auto& result = inferred_outputs[i]; + auto& desc = select(shape_descs[i], value_descs[i]); + + // return if no need to call infer_func + if constexpr (is_shape) { + if (result.shape.ndim != 0) { + return &result.shape; + } + } else { + if (!result.value.empty()) { + return &result.value; + } + } + if (!desc) { + return nullptr; + } + + // fill args for infer_func + cg::static_infer::InpVal args{1}; + args.val.reserve(desc->deps.size()); + auto push_shape = [&args](const TensorShape* shape) { + args.val.emplace_back(); + args.val.back().m_shape = shape; + }; + auto push_value = [&args](const DeviceTensorND* value) { + args.val.emplace_back(); + args.val.back().m_value = value; + }; + + for (auto&& dep : desc->deps) { + if (auto opr = dep.dest->owner_opr()->template try_cast_final()) { + if (dep.type == DepType::SHAPE) { + if (dep.dest->shape().ndim) { + push_shape(&dep.dest->shape()); + } else { + return nullptr; + } + } else { + if (auto* p = opr->get_static_infer_value(may_sync)) { + push_value(p); + } else { + return nullptr; + } + } + continue; + } + + // dep must be an output + if (dep.type == DepType::SHAPE) { + if (auto* p = do_infer(dep.dest, may_sync)) { + push_shape(p); + } else { + return nullptr; + } + } else { + if (auto* p = do_infer(dep.dest, may_sync)) { + push_value(p); + } else { + return nullptr; + } + } + } + + // call infer_func + if constexpr (is_shape) { + if (!desc->infer_func(result.shape, args)) { + mgb_log_warn("something is missing for shape inference of %s", + cur_opr->dyn_typeinfo()->name); + return nullptr; + } + return &result.shape; + } else { + if (!desc->infer_func(result.value, args)) { + mgb_log_warn("something is missing for value inference of %s", + cur_opr->dyn_typeinfo()->name); + return nullptr; + } + return &result.value; + } + } + + const TensorShape& infer_shape(Tag var) override { + auto* p = do_infer(var, true); + mgb_assert(p, "failed to infer shape for %s", var->name().c_str()); + return *p; + } + const TensorShape* infer_shape_fallible(Tag var) override { + return do_infer(var, false); + } + const DeviceTensorND& infer_value(Tag var) override { + auto* p = do_infer(var, true); + mgb_assert(p, "failed to infer value for %s", var->name().c_str()); + return *p; + } + const DeviceTensorND* infer_value_fallible(Tag var) override { + return do_infer(var, false); + } + + DepVal get_rt_static_source_deps(const DepElement&) override {mgb_assert(0);} +}; + +class ProxyGraph::SeqCompNodeOptimizer : public cg::SeqCompNodeOptimizer { + void register_stream_var(VarNode*, StreamPropType) override {} + void register_propagate_function(VarNode*, PropFunction) override {} + StreamPropType stream_prop_type(VarNode*) override {mgb_assert(0);} +}; + +class ProxyGraph::ProxyGraphImpl : public cg::ComputingGraph { + static std::atomic m_node_id; + ProxyGraph* m_owner; + MemPool m_var_node_pool; + std::vector> m_opr_refkeeper; + CompNode::UnorderedSet m_used_comp_node; + VarReceiverInfo m_var_receiver_info; +public: + ~ProxyGraphImpl() { + mgb_assert(!m_owner->m_cur_opr); + if (is_finalized()) return; + for (auto&& i : m_used_comp_node) { + if (i.device_type() == CompNode::DeviceType::CUDA) continue; + i.sync(); + } + } + + ProxyGraphImpl(ProxyGraph* owner) : m_owner(owner) { + options().imperative_proxy_graph = true; + options().log_level = 0; + m_var_receiver_info.dev_value = 1; + m_var_receiver_info.allow_empty_value = 1; + } + + static std::unique_ptr make(ProxyGraph* owner) { + return std::make_unique(owner); + } + + void add_used_comp_node(CompNode cn) { + m_used_comp_node.insert(cn); + } + + bool invalid() const { + return is_finalized() || nr_oprs_in_graph() > m_owner->m_max_op_cnt; + } + + size_t next_node_id() override { + return m_node_id.fetch_add(1); + } + + void* alloc_varnode_storage() override { + return m_var_node_pool.alloc_raw(); + } + + void free_varnode_storage(void* ptr) override { + m_var_node_pool.free_raw(ptr); + } + + OperatorNodeBase* insert_opr(std::unique_ptr opr_uniqp) override { + mgb_assert(!is_finalized()); + auto opr = opr_uniqp.get(); + + if (!opr->inserted_in_graph()) { + m_opr_refkeeper.emplace_back(std::move(opr_uniqp)); + opr->set_inserted_in_graph(); + opr->init_output_comp_node(); + opr->init_output_dtype(); + opr->init_output_format(); + } + return opr; + } + + cg::static_infer::StaticInferManager& static_infer_manager() override { + return *m_owner->m_static_infer_manager; + } + + cg::SeqCompNodeOptimizer& seq_comp_node_optimizer() override { + return *m_owner->m_seq_comp_node_optimizer; + } + + std::shared_ptr on_comp_node_finalize() override { + // FIXME: mutex + mgb_assert(!m_owner->m_cur_opr); + // finalize would do sync first + m_opr_refkeeper.clear(); + return {}; + } + + const VarReceiverInfo& var_receiver_in_current_comp_seq( + const VarNode *var) const override { + return m_var_receiver_info; + } + + size_t nr_oprs_in_graph() const override {return m_opr_refkeeper.size();} + + std::unique_ptr compile(const OutputSpec &out_spec) override {mgb_assert(0);} + SmallVector> compile_multi_part( + const SmallVector& out_specs) override {mgb_assert(0);} + cg::AsyncExecutable* current_comp_seq() override {mgb_assert(0);} + std::string get_mem_allocation_info() const override {mgb_assert(0);} + VarNode* find_var_by_id(size_t id) const override {mgb_assert(0);} + void share_device_memory_with(ComputingGraph &other) override {mgb_assert(0);} + void set_device_memory_allocator( + std::shared_ptr allocator) override {mgb_assert(0);} + size_t get_device_memory_size(CompNode cn) override {mgb_assert(0);} + size_t clear_device_memory() override {mgb_assert(0);} + void set_as_subgraph(ComputingGraph &par_graph) override {mgb_assert(0);} + void record_async_error(std::unique_ptr async_exc) override {mgb_assert(0);} +}; + +std::atomic ProxyGraph::ProxyGraphImpl::m_node_id = 0; + +ProxyGraph::ProxyGraph() : + m_graph(ProxyGraphImpl::make(this)), + m_env{new ExecEnv}, + m_static_infer_manager(new StaticInferManager(this)), + m_seq_comp_node_optimizer(new SeqCompNodeOptimizer()) { +} + +void ProxyGraph::reset() { + mgb_assert(!m_cur_opr); + m_graph = ProxyGraphImpl::make(this); +} + +ProxyGraph* ProxyGraph::get_default_graph() { + static thread_local ProxyGraph inst; + if (inst.m_graph->invalid()) { + inst.reset(); + } + return &inst; +} + +class ProxyGraph::CurOprGuard { +public: + CurOprGuard(ProxyGraph* owner, OperatorNodeBase* opr) : m_owner(owner) { + mgb_assert(!owner->m_cur_opr); + owner->m_cur_opr = opr; + } + CurOprGuard(const CurOprGuard&) = delete; + ~CurOprGuard() { + m_owner->cleanup(); + } +private: + ProxyGraph* m_owner; +}; + +#define CUR_OPR_GUARD(opr) CurOprGuard MGB_TOKENPASTE2(__cur_opr_guard_, __LINE__)(this, opr) + +/*********************** Physical Tensor Impl ***********************/ + +SmallVector ProxyGraph::infer_output_attrs( + const OpDef& opdef, + const SmallVector& inputs) { + SmallVector ret; + CUR_OPR_GUARD(get_proxy_opr(opdef, inputs)); + do_shape_infer(true); + for (auto&& i: m_cur_opr->usable_output()) { + mgb_assert(i->dtype().valid() && i->comp_node().valid()); + mgb_assert(i->shape().ndim || i->contain_flag(VarNode::Flag::NO_SYS_MEM_ALLOC)); + ret.push_back({{i->shape(), i->dtype()}, i->comp_node()}); + } + return ret; +} + +void ProxyGraph::invoke_op(const OpDef& opdef, + const SmallVector& inputs, + const SmallVector& outputs) { + CUR_OPR_GUARD(get_proxy_opr(opdef, inputs)); + init_output_tensor(outputs); + for (auto oup : m_cur_opr->output()) { + m_graph->add_used_comp_node(oup->comp_node()); + } + m_cur_opr->execute(*m_env); +} + +void ProxyGraph::cleanup() { + if (m_cur_opr) { + for (auto&& i : m_cur_opr->input()) { + i->m_dev_tensor.storage({}); + } + for (auto&& i : m_cur_opr->output()) { + i->m_dev_tensor.storage({}); + } + m_static_infer_manager->clear(); + } + m_cur_opr = nullptr; +} + +void ProxyGraph::init_output_tensor(const SmallVector& outputs) { + // get proxy opr + auto proxy = m_cur_opr; + + do_shape_infer(true); + + size_t j = 0; + for (auto&& var : proxy->output()) { + auto &&chk = var->m_mem_plan.reset_from_owner_var().chunk(); + if (var->contain_flag(VarNode::Flag::VOLATILE_CONTENT)) { + // alloc workspace + TensorLayout layout{var->shape(), var->dtype(), var->format()}; + DeviceTensorStorage storage; + storage.comp_node(var->comp_node()) + .ensure_size(layout.dtype.size(layout.total_nr_elems())); + var->m_dev_tensor.reset(storage, layout); + } else { + mgb_assert(j < outputs.size()); + auto &&tensor = outputs[j]; + auto &&layout = tensor->layout(); + mgb_assert(var->comp_node() == tensor->comp_node() && + var->shape().eq_shape(layout) && + var->dtype() == layout.dtype); + var->assign_dev_tensor_from_tensor(tensor->dev_tensor()); + ++ j; + } + chk.mem_alloc_status.set_from_owner_var(); + } + mgb_assert(j == outputs.size()); + + // Memory forwarding was bypassed in megbrain with graph option + // imerative_proxy_graph on, here we call mem_plan_fwd_in2out_readonly + // to initialize some opr(e.g. Subtensor)'s internal state + // TODO: implement memory forwarding + proxy->mem_plan_fwd_in2out_readonly(); + { + // some opr (e.g. Reduce) rely on on_mem_status_changed to set + // input/output tensor corretly, since we bypass var_node_mem_mgr + // on_mem_status_changed should be called here + auto&& cb = proxy->get_opr_event_callback().on_mem_status_changed; + if (cb.valid()) { + cb.val()(); + } + } +} + +cg::OperatorNodeBase* ProxyGraph::get_proxy_opr( + const OpDef& opdef, + const SmallVector& inputs) { + VarNodeArray vinputs(inputs.size()); + for (size_t i = 0; i < inputs.size(); ++ i) { + vinputs[i] = InputPlaceholder::make(*m_graph, *inputs[i]).node(); + } + auto opr = OpDef::apply_on_var_node(opdef, vinputs); + mgb_assert(opr->dyn_typeinfo() != InputPlaceholder::typeinfo()); + for (auto &&i : opr->input()) { + mgb_assert(i->owner_opr()->dyn_typeinfo() == + InputPlaceholder::typeinfo()); + } + return opr; +} + +/*********************** Logical Tensor Impl ***********************/ + +size_t ProxyGraph::get_opr_output_size(const OpDef& opdef, + const SmallVector& inputs) { + return get_proxy_opr(opdef, inputs)->usable_output().size(); +} + +SmallVector ProxyGraph::infer_output_attrs_fallible( + const OpDef& opdef, + const SmallVector& inputs) { + auto opr = get_proxy_opr(opdef, inputs); + CUR_OPR_GUARD(opr); + do_shape_infer(false); + SmallVector ret; + for (auto&& i : opr->usable_output()) { + ret.push_back({{i->shape(), i->dtype()}, i->comp_node()}); + } + return ret; +} + +struct ProxyGraph::GradGraph { + cg::VarNodeArray inputs; + cg::VarNodeArray outputs; + cg::VarNodeArray output_grads; + cg::VarNode* grad; +}; + +BackwardGraphResult +ProxyGraph::make_backward_graph( + const OpDef& opdef, + const SmallVector& input_descs, + const SmallVector& input_requires_grad, + const SmallVector& output_has_grad) { + ThinHashMap var2idx; + auto push = [&var2idx, cnt=0](VarNode* var) mutable { + auto&& ret = var2idx.emplace(var, cnt ++); + mgb_assert(ret.second, "var %s has been already inserted", var->cname()); + return ret.first->second; + }; + auto inputs = make_input_place_holders(input_descs); + auto fwd = OpDef::apply_on_var_node(opdef, inputs); + auto&& outputs = fwd->usable_output(); + SmallVector output_descs; + for (auto&& i : outputs) { + output_descs.push_back({TensorLayout{i->dtype()}, i->comp_node()}); + } + auto output_grads = make_input_place_holders(output_descs); + mgb_assert(output_grads.size() == output_has_grad.size()); + bool any_input_has_grad = false; + for (size_t i = 0; i < output_grads.size(); ++ i) { + if (!output_has_grad[i]) { + output_grads[i] = nullptr; + } else { + any_input_has_grad = true; + } + } + if (!any_input_has_grad) { + return {}; + } + auto* gfunc = cg::lookup_grad_func(fwd->dyn_typeinfo()); + + BackwardGraphResult result; + auto&& backward = BackwardGraph::make(); + auto&& igraph = backward->cast_final_safe().graph(); + + size_t nr_backward_graph_inputs = 0; + auto gen_expr = [this, &var2idx, &igraph, &push, &fwd, + &nr_backward_graph_inputs](cg::OperatorNodeBase* op) { + if (auto t = as_tensor(op)) { + mgb_assert(op->output().size() == 1); + igraph.constants.emplace_back(push(op->output(0)), std::move(t)); + } else if (op->same_type()) { + ++ nr_backward_graph_inputs; + push(op->output(0)); + } else { + std::vector inputs, outputs; + for (auto &&i : op->input()) { + if (i->owner_opr() == fwd) { + if (var2idx.find(i) == var2idx.end()) { + ++ nr_backward_graph_inputs; + push(i); + } + } + inputs.push_back(var2idx.at(i)); + } + for (auto &&i : op->usable_output()) { + outputs.push_back(push(i)); + } + igraph.exprs.emplace_back(OpDef::make_from_op_node(op), inputs, outputs); + } + }; + + // set backward graph outputs + cg::DepOprIter iter{gen_expr}; + iter.set_visited(fwd); + result.input_has_grad.resize(inputs.size()); + + VarNodeArray output_grads_with_unused_var; + { + auto iter = output_grads.begin(); + for (auto&& i : fwd->output()) { + if (i->contain_flag(VarNode::Flag::VOLATILE_CONTENT)) { + // the var node with VOLATILE_CONTENT(e.g. workspace + // or an empty var) would not be considered as a normal + // output, so its grad is always NULL + output_grads_with_unused_var.push_back(nullptr); + } else { + output_grads_with_unused_var.push_back(*iter); + ++ iter; + } + } + mgb_assert(iter == output_grads.end()); + } + + Maybe grad_results; + for (size_t i = 0; i < inputs.size(); ++ i) { + VarNode* grad; + if (grad_results.valid()) { + grad = grad_results.val()[i]; + } else { + auto res = (*gfunc)(fwd, i, output_grads_with_unused_var); + if (res.from_single()) { + grad = res.single(); + } else { + grad_results.emplace(res.all(fwd)); + grad = grad_results.val()[i]; + } + } + if (grad && !grad->owner_opr()->same_type() + && input_requires_grad[i]) { + mgb_assert(!grad->owner_opr()->same_type(), + "gradient of operator %s w.r.t. input #%lu is " + "either not well defined or not implemented", + fwd->dyn_typeinfo()->name, i); + iter.add(grad); + igraph.outputs.push_back(var2idx.at(grad)); + result.input_has_grad[i] = true; + } else { + result.input_has_grad[i] = false; + } + } + if (igraph.outputs.empty()) { + return {}; + } + + // set backward graph inputs + igraph.inputs.reserve(nr_backward_graph_inputs); + result.save_for_backward.reserve(nr_backward_graph_inputs); + auto write_inputs = [&igraph, &var2idx, &result](const VarNodeArray& vars) { + for (auto&& i: vars) { + auto&& iter = var2idx.find(i); + if (iter != var2idx.end()) { + igraph.inputs.push_back(iter->second); + result.save_for_backward.push_back(true); + } else { + result.save_for_backward.push_back(false); + } + } + }; + write_inputs(inputs); + write_inputs(outputs); + write_inputs(output_grads); + mgb_assert(igraph.inputs.size() == nr_backward_graph_inputs); + + auto treat_as_single = [](auto&& igraph) { + if (igraph.exprs.size() != 1) + return false; + auto&& expr = igraph.exprs[0]; + auto&& expr_inputs = std::get<1>(expr); + if (expr_inputs.size() != igraph.inputs.size()) { + return false; + } + for (size_t i = 0; i < expr_inputs.size(); ++ i) { + if (igraph.inputs[i] != expr_inputs[i]) { + return false; + } + } + auto&& expr_outputs = std::get<2>(expr); + if (expr_outputs.size() != igraph.outputs.size()) { + return false; + } + for (size_t i = 0; i < expr_outputs.size(); ++ i) { + if (igraph.outputs[i] != expr_outputs[i]) { + return false; + } + } + return true; + }; + if (treat_as_single(igraph)) { + result.backward = std::get<0>(igraph.exprs[0]); + } else { + result.backward = backward; + } + return result; +} + +cg::OperatorNodeBase* ProxyGraph::get_proxy_opr(const OpDef& opdef, + const SmallVector& inputs) { + mgb_assert(!m_cur_opr); + auto vinputs = make_input_place_holders(inputs); + return OpDef::apply_on_var_node(opdef, vinputs); +} + +VarNodeArray ProxyGraph::make_input_place_holders(const SmallVector& inputs) { + VarNodeArray vinputs(inputs.size()); + for (size_t i = 0; i < inputs.size(); ++ i) { + vinputs[i] = InputPlaceholder::make(*m_graph, inputs[i]).node(); + } + return vinputs; +} + +/*********************** Common Impl ***********************/ + +void ProxyGraph::do_shape_infer(bool sync_value) { + m_static_infer_manager->update(); + + for (auto* var : m_cur_opr->output()) { + if (sync_value) { + var->shape(m_static_infer_manager->infer_shape(var)); + } else if (auto* shape = m_static_infer_manager->infer_shape_fallible(var)) { + var->shape(*shape); + } + } +} + +TensorPtr ProxyGraph::as_tensor(cg::OperatorNodeBase* opr, bool share) { + // TODO : maybe some tensor should copy value from origin opr rather than + // share the RawStorage + mgb_assert(share, "can't share memory with opr %s", opr->cname()); + if (opr->same_type()) { + auto&& dv = opr->cast_final_safe().value(); + HostTensorND hv(dv.comp_node(), dv.shape(), dv.dtype()); + const DeviceTensorND* cpu_value; + // get host value + if (opr->owner_graph() == m_graph.get()) { + CUR_OPR_GUARD(opr); + m_static_infer_manager->update(); + cpu_value = m_static_infer_manager->infer_value_fallible(opr->output(0)); + } else { + cpu_value = opr->owner_graph()->static_infer_manager().infer_value_fallible(opr->output(0)); + } + mgb_assert(cpu_value); + mgb_assert(cpu_value->comp_node() == CompNode::default_cpu()); + // default_cpu is synchronous with respect to caller + hv.proxy_to_default_cpu().copy_from_fixlayout(*cpu_value); + return Tensor::make(dv, hv); + } else if (opr->same_type()) { + return Tensor::make(opr->cast_final_safe().get_dev_tensor()); + } else { + return {}; + } +} + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/impl/proxy_graph.h b/imperative/src/impl/proxy_graph.h new file mode 100644 index 0000000000000000000000000000000000000000..c26cc32fb1c31ce199e1d32fde47c9ac8ba53b04 --- /dev/null +++ b/imperative/src/impl/proxy_graph.h @@ -0,0 +1,104 @@ +/** + * \file src/core/impl/imperative/proxy_graph.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/imperative.h" +#include "megbrain/graph/cg.h" +#include "megbrain/graph/grad_impl.h" +#include "megbrain/comp_node.h" + +#include "megbrain/imperative/ops/backward_graph.h" + +namespace mgb { +namespace imperative { + +class ProxyGraph : public NonCopyableObj { +public: + static ProxyGraph* get_default_graph(); + + /********************** Physical Tensor API **********************/ + + SmallVector infer_output_attrs( + const OpDef& opdef, + const SmallVector& inputs); + + void invoke_op( + const OpDef& opdef, + const SmallVector& inputs, + const SmallVector& outputs); + + BackwardGraphResult make_backward_graph( + const OpDef& opdef, + const SmallVector& input_descs, + const SmallVector& input_requires_grad, + const SmallVector& output_has_grad); + + /********************** Logical Tensor API **********************/ + + size_t get_opr_output_size( + const OpDef& opdef, + const SmallVector& inputs); + + SmallVector infer_output_attrs_fallible( + const OpDef& opdef, + const SmallVector& inputs); + +private: + ProxyGraph(); + + class ProxyGraphImpl; + class ExecEnv; + class StaticInferManager; + class SeqCompNodeOptimizer; + class InputPlaceholder; + struct ProxyGraphInst; + struct GradGraph; + struct CurOprGuard; + + void reset(); + + /********************** Physical Tensor Helper **********************/ + + void cleanup(); + + void init_output_tensor( + const SmallVector& outputs); + + cg::OperatorNodeBase* get_proxy_opr( + const OpDef& opdef, + const SmallVector& inputs); + + /********************** Logical Tensor Helper **********************/ + + cg::OperatorNodeBase* get_proxy_opr( + const OpDef& opdef, + const SmallVector& inputs); + + cg::VarNodeArray make_input_place_holders( + const SmallVector& inputs); + + /********************** Common Helper **********************/ + + void do_shape_infer(bool sync_value); + + TensorPtr as_tensor(cg::OperatorNodeBase* opr, bool share=true); + + cg::OperatorNodeBase* m_cur_opr = nullptr; + std::unique_ptr m_graph; + size_t m_max_op_cnt = 1000; + std::unique_ptr m_env; + std::unique_ptr m_static_infer_manager; + std::unique_ptr m_seq_comp_node_optimizer; +}; + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/impl/proxy_graph_detail.cpp b/imperative/src/impl/proxy_graph_detail.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3fa962cf6c33d480e496b25f35acbe0fe60419e4 --- /dev/null +++ b/imperative/src/impl/proxy_graph_detail.cpp @@ -0,0 +1,124 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "./proxy_graph.h" +#include "./proxy_graph_detail.h" + +namespace mgb { +namespace imperative { +namespace proxy_graph_detail { + +namespace { +SmallVector to_raw_ptr_array( + const SmallVector& inputs, + bool ensure_storage=true) { + SmallVector ret; + for (auto&& i : inputs) { + mgb_assert(i); + ret.push_back(i.get()); + if (ensure_storage) { + // apply lazy allocation + i->blob()->storage(); + } + } + return ret; +} +} // anonymous namespace + +void exec(const OpDef& def, + const SmallVector& inputs_, + const SmallVector& outputs_) { + auto&& graph = ProxyGraph::get_default_graph(); + auto inputs = to_raw_ptr_array(inputs_), + outputs = to_raw_ptr_array(outputs_); + CompNode::UnorderedSet used_cns; + for (auto&& out: outputs) { + auto cn = out->comp_node(); + if (used_cns.insert(cn).second) { + for (auto&& in: inputs) { + if (in->comp_node() != cn) { + auto&& e = in->get_or_create_event(); + e->device_wait_by(cn); + } + } + } + } + graph->invoke_op(def, inputs, outputs); + for (auto&& cn: used_cns) { + for (auto&& in: inputs) { + if (in->comp_node() != cn) { + in->add_release_callback(cn); + } + } + } +} + +SmallVector infer_output_attrs(const OpDef& def, + const SmallVector& inputs) { + auto&& graph = ProxyGraph::get_default_graph(); + return graph->infer_output_attrs(def, to_raw_ptr_array(inputs)); +} + +SmallVector +infer_output_attrs_fallible(const OpDef& def, + const SmallVector& inputs) { + auto&& graph = ProxyGraph::get_default_graph(); + return graph->infer_output_attrs_fallible(def, inputs); +} + +namespace { + +size_t get_backward_graph_hash_key(const OpDef& def, + const SmallVector& inputs, + const SmallVector& input_requires_grad, + const SmallVector& output_has_grad) { + XXHash state; + size_t length = 0, data[3 + 2 * inputs.size()]; + data[length ++] = def.hash(); + for (auto &&i : inputs) { + data[length ++] = mgb::hash(i.layout.dtype.handle()); + data[length ++] = mgb::hash(i.comp_node); + } + data[length ++] = mgb::hash(input_requires_grad); + data[length ++] = mgb::hash(output_has_grad); + mgb_assert(length == 3 + 2 * inputs.size()); + state.update(data, length * sizeof(size_t)); + return state.digest(); +} + +struct BackwardGraphCache : std::unordered_map, CompNodeDepedentObject { + std::shared_ptr on_comp_node_finalize() override { + clear(); + return {}; + } +} backward_graph_cache; + +} // anonymous namespace + +BackwardGraphResult +make_backward_graph(const OpDef& def, + const SmallVector& inputs, + const SmallVector& input_requires_grad, + const SmallVector& output_has_grad) { + auto&& graph = ProxyGraph::get_default_graph(); + auto hash_key = get_backward_graph_hash_key(def, inputs, input_requires_grad, output_has_grad); + auto&& iter = backward_graph_cache.find(hash_key); + if (iter != backward_graph_cache.end()) { + return iter->second; + } + auto res = graph->make_backward_graph(def, inputs, input_requires_grad, output_has_grad); + backward_graph_cache.emplace(hash_key, res); + return res; +} + +} // namespace proxy_graph_detail +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} \ No newline at end of file diff --git a/imperative/src/impl/proxy_graph_detail.h b/imperative/src/impl/proxy_graph_detail.h new file mode 100644 index 0000000000000000000000000000000000000000..16c05a6e339ac121d289be00cdc08e3742087392 --- /dev/null +++ b/imperative/src/impl/proxy_graph_detail.h @@ -0,0 +1,39 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/imperative/op_def.h" + +namespace mgb { +namespace imperative { +namespace proxy_graph_detail { + +void exec(const OpDef& def, + const SmallVector& inputs_, + const SmallVector& outputs_); + +SmallVector infer_output_attrs(const OpDef& def, + const SmallVector& inputs); + +SmallVector +infer_output_attrs_fallible(const OpDef& def, + const SmallVector& inputs); + +BackwardGraphResult +make_backward_graph(const OpDef& def, + const SmallVector& inputs, + const SmallVector& input_requires_grad, + const SmallVector& output_has_grad); + +} // namespace proxy_graph_detail +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} \ No newline at end of file diff --git a/imperative/src/include/megbrain/imperative.h b/imperative/src/include/megbrain/imperative.h new file mode 100644 index 0000000000000000000000000000000000000000..ac3dceda7d5be597497f794b9fe4342a71f55b3a --- /dev/null +++ b/imperative/src/include/megbrain/imperative.h @@ -0,0 +1,16 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/imperative/physical_tensor.h" +#include "megbrain/imperative/op_def.h" +#include "megbrain/imperative/opdef/all.h" + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/include/megbrain/imperative/blob_manager.h b/imperative/src/include/megbrain/imperative/blob_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..61dbd540d9c57bcf0d857cb39692104a4839aeb2 --- /dev/null +++ b/imperative/src/include/megbrain/imperative/blob_manager.h @@ -0,0 +1,35 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/imperative/physical_tensor.h" + +namespace mgb { +namespace imperative { + +class BlobManager : public NonCopyableObj { +public: + virtual ~BlobManager() = default; + + static BlobManager* inst(); + + virtual void alloc_with_defrag(Blob* blob, size_t size) = 0; + + virtual void register_blob(Blob* blob) = 0; + + virtual void unregister_blob(Blob* blob) = 0; + + virtual void set_enable(bool flag) = 0; + + virtual void defrag(const CompNode& cn) = 0; +}; + +} // namespace imperative +} // namespace mgb diff --git a/imperative/src/include/megbrain/imperative/interpreter.h b/imperative/src/include/megbrain/imperative/interpreter.h new file mode 100644 index 0000000000000000000000000000000000000000..c9124a0c5a0d571844b3473e76849e8944e7b51a --- /dev/null +++ b/imperative/src/include/megbrain/imperative/interpreter.h @@ -0,0 +1,39 @@ +#include + +#include "megbrain/imperative/op_def.h" + +namespace mgb::imperative::interpreter { + +struct Interpreter { + using Handle = void*; + + struct Channel { + virtual ~Channel() = default; + + virtual Handle put(const HostTensorND& value) = 0; + + virtual void del(Handle) = 0; + + virtual SmallVector apply_op( + std::shared_ptr op, + const SmallVector& inputs) = 0; + + virtual HostTensorND get_value(Handle) = 0; + virtual TensorShape get_shape(Handle) = 0; + virtual DType get_dtype(Handle) = 0; + virtual CompNode get_device(Handle) = 0; + + virtual DeviceTensorND get_dev_tensor(Handle) = 0; + + virtual void sync() = 0; + virtual void close() = 0; + + virtual void config_async_level(int level) = 0; + }; + + virtual std::unique_ptr create_channel() = 0; + + static Interpreter& inst(); +}; + +} // namespace mgb::imperative::interpreter diff --git a/imperative/src/include/megbrain/imperative/op_def.h b/imperative/src/include/megbrain/imperative/op_def.h new file mode 100644 index 0000000000000000000000000000000000000000..f742fc6bfa25a54dc443e0e2df20a835745eb067 --- /dev/null +++ b/imperative/src/include/megbrain/imperative/op_def.h @@ -0,0 +1,91 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/graph.h" +#include "megbrain/imperative/physical_tensor.h" + +namespace mgb { +namespace imperative { + +class OpDef; +struct OpTrait; + +struct BackwardGraphResult { + std::shared_ptr backward; + std::vector save_for_backward; + std::vector input_has_grad; +}; + +class OpDef : public Hashable { + mutable const OpTrait* m_trait = nullptr; +public: + virtual ~OpDef() = default; + + virtual std::shared_ptr copy() const = 0; + + static std::shared_ptr make_from_op_node( + cg::OperatorNodeBase* node); + + static SmallVector apply_on_physical_tensor( + const OpDef& def, + const SmallVector& inputs); + + static void exec( + const OpDef& def, + const SmallVector& inputs, + const SmallVector& outputs); + + static cg::OperatorNodeBase* apply_on_var_node( + const OpDef& def, + const VarNodeArray& inputs); + + static SmallVector infer_output_attrs_fallible( + const OpDef& def, + const SmallVector& inputs); + + static SmallVector infer_output_attrs( + const OpDef& def, + const SmallVector& inputs); + + static BackwardGraphResult make_backward_graph( + const OpDef& def, + const SmallVector& inputs, + const SmallVector& input_requires_grad, + const SmallVector& output_has_grad); + + const OpTrait* trait() const; + + virtual size_t hash() const { + mgb_throw(MegBrainError, "not implemented"); + } + + virtual bool is_same_st(const Hashable&) const { + mgb_throw(MegBrainError, "not implemented"); + } +}; + +template +class OpDefImplBase : public OpDef { +public: + virtual std::shared_ptr copy() const override { + return std::shared_ptr(new T(this->cast_final_safe())); + } + + template + static std::shared_ptr make(const Args& ...args) { + return std::shared_ptr(new T(args...)); + } +}; + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/include/megbrain/imperative/opr_utility.h b/imperative/src/include/megbrain/imperative/opr_utility.h new file mode 100644 index 0000000000000000000000000000000000000000..f2c94dfe73df03968c47c914c8f236b31ce5abc4 --- /dev/null +++ b/imperative/src/include/megbrain/imperative/opr_utility.h @@ -0,0 +1,95 @@ +/** + * \file src/core/include/megbrain/opr_utility.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/graph.h" +#include "megbrain/graph/event.h" +#include "megbrain/opr/internal/identical_fwd.h" +#include "megbrain/opr/internal/param_tag_defs.h" +#include "megbrain/opr/internal/megdnn_opr_wrapper.h" +#include "megbrain/opr/param_defs.h" + +#include "megdnn/oprs/utils.h" + +namespace mgb { +namespace opr { +/* + * InputCallback, OutputCallback, NopCallback + * Intended for runtime data exchange with Python. + */ + +MGB_DEFINE_OPR_CLASS(InputCallback, cg::SingleCNOperatorNodeBase) // { +public: + using callback_t = thin_function; + InputCallback(cg::ComputingGraph& graph, + callback_t callback, + const VarNodeArray& inputs, + const OperatorNodeConfig &config); + static SymbolVarArray make(cg::ComputingGraph& graph, + callback_t callback, + CompNode comp_node, + DType dtype, + const SymbolVarArray& inputs = {}); +protected: + void scn_do_execute() override; + void init_output_static_infer_desc() override; + NodeProp* do_make_node_prop() const override; +private: + callback_t m_callback; +}; + +MGB_DEFINE_OPR_CLASS(OutputCallback, cg::SingleCNOperatorNodeBase) // { +public: + using callback_t = thin_function; + struct Param { + callback_t callback; + bool borrow = false; + }; + OutputCallback(Param param, + const VarNodeArray& inputs, + const OperatorNodeConfig &config); + static SymbolVar make(Param param, + const SymbolVarArray& inputs); + static SymbolVar make(Param param, + SymbolVar input) { + return make(std::move(param), SymbolVarArray{input}); + } +protected: + void scn_do_execute() override; + void init_output_static_infer_desc() override; + NodeProp* do_make_node_prop() const override; +private: + Param m_param; +}; + +MGB_DEFINE_OPR_CLASS(NopCallback, cg::OperatorNodeBase) // { +public: + using callback_t = thin_function; + NopCallback(cg::ComputingGraph& graph, + callback_t callback, + const VarNodeArray& inputs, + const OperatorNodeConfig &config); + static SymbolVar make(cg::ComputingGraph& graph, + callback_t callback, + CompNode comp_node, + const SymbolVarArray& inputs = {}); +protected: + void do_execute(ExecEnv &env) override; + void init_output_static_infer_desc() override; + void init_output_comp_node() override; + void on_output_comp_node_stream_changed() override; + NodeProp* do_make_node_prop() const override; +private: + callback_t m_callback; +}; +} // namespace opr +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/include/megbrain/imperative/ops/backward_graph.h b/imperative/src/include/megbrain/imperative/ops/backward_graph.h new file mode 100644 index 0000000000000000000000000000000000000000..4f5124f180e683a0582b85a8e3de864738cca493 --- /dev/null +++ b/imperative/src/include/megbrain/imperative/ops/backward_graph.h @@ -0,0 +1,58 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/imperative/op_def.h" + +namespace mgb { +namespace imperative { + +// a special OpDef used for taking gradient on physical tensor +struct BackwardGraph final : public OpDefImplBase { + MGB_DYN_TYPE_OBJ_FINAL_DECL; +public: + struct InternalGraph { + // op, inputs, outputs + using Expr = std::tuple, + std::vector, std::vector>; + std::vector exprs; + + // index array of input nodes + std::vector inputs; + + // index array of output nodes + std::vector outputs; + + // pair of (node index, correspending constant) + std::vector> constants; + + SmallVector + apply(const SmallVector& inputs) const; + + SmallVector + infer_attrs(const SmallVector& inputs) const; + }; + + const InternalGraph& graph() const { + return m_graph; + } + + InternalGraph& graph() { + return m_graph; + } + +private: + InternalGraph m_graph; +}; + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/include/megbrain/imperative/ops/collective_comm.h b/imperative/src/include/megbrain/imperative/ops/collective_comm.h new file mode 100644 index 0000000000000000000000000000000000000000..4d6c515b7cfff5474e4c21a26540df0826bd64f8 --- /dev/null +++ b/imperative/src/include/megbrain/imperative/ops/collective_comm.h @@ -0,0 +1,56 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/imperative/op_def.h" +#include "megbrain/opr/param_defs.h" + +namespace mgb { +namespace imperative { + +class CollectiveComm : public OpDefImplBase { + MGB_DYN_TYPE_OBJ_FINAL_DECL; + +public: + CollectiveComm() = default; + CollectiveComm(const std::string& key_, size_t nr_devices_, + uint32_t rank_, bool is_root_, bool local_grad_, + const std::string& addr_, uint32_t port_, + const megdnn::param::CollectiveComm::Mode& mode_, + const DType& dtype_, const std::string& backend_, + const std::string& comp_node_) + : key(key_), + nr_devices(nr_devices_), + rank(rank_), + is_root(is_root_), + local_grad(local_grad_), + addr(addr_), + port(port_), + mode(mode_), + dtype(dtype_), + backend(backend_), + comp_node(comp_node_) {} + std::string key; + size_t nr_devices; + uint32_t rank; + bool is_root; + bool local_grad; + std::string addr; + uint32_t port; + megdnn::param::CollectiveComm::Mode mode; + DType dtype; + std::string backend; + std::string comp_node; +}; + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/include/megbrain/imperative/ops/cond_take.h b/imperative/src/include/megbrain/imperative/ops/cond_take.h new file mode 100644 index 0000000000000000000000000000000000000000..64cdce0e8b044d2956dfd0ad88135f5448ac06a7 --- /dev/null +++ b/imperative/src/include/megbrain/imperative/ops/cond_take.h @@ -0,0 +1,22 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/imperative/op_def.h" + +namespace mgb::imperative { + +class CondTake : public OpDefImplBase { + MGB_DYN_TYPE_OBJ_FINAL_DECL; +public: + CondTake() = default; +}; + +} // namespace mgb::imperative diff --git a/imperative/src/include/megbrain/imperative/ops/io_remote.h b/imperative/src/include/megbrain/imperative/ops/io_remote.h new file mode 100644 index 0000000000000000000000000000000000000000..83e5867338a4486f29a4a77cc7e847da73bca303 --- /dev/null +++ b/imperative/src/include/megbrain/imperative/ops/io_remote.h @@ -0,0 +1,61 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/imperative/op_def.h" + +namespace mgb { +namespace imperative { + +class RemoteSend : public OpDefImplBase { + MGB_DYN_TYPE_OBJ_FINAL_DECL; + +public: + RemoteSend() = default; + RemoteSend(const std::string& key_, const std::string& addr_, + uint32_t port_, uint32_t rank_to_) + : key(key_), + addr(addr_), + port(port_), + rank_to(rank_to_) {} + std::string key; + std::string addr; + uint32_t port; + uint32_t rank_to; +}; + +class RemoteRecv : public OpDefImplBase { + MGB_DYN_TYPE_OBJ_FINAL_DECL; + +public: + RemoteRecv() = default; + RemoteRecv(const std::string& key_, const std::string& addr_, + uint32_t port_, uint32_t rank_from_, TensorShape shape_, + CompNode cn_, const DType& dtype_) + : key(key_), + addr(addr_), + port(port_), + rank_from(rank_from_), + cn(cn_), + shape(shape_), + dtype(dtype_) {} + std::string key; + std::string addr; + uint32_t port; + uint32_t rank_from; + CompNode cn; + TensorShape shape; + DType dtype; +}; + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/include/megbrain/imperative/ops/nms.h b/imperative/src/include/megbrain/imperative/ops/nms.h new file mode 100644 index 0000000000000000000000000000000000000000..80fcc642ef1478b4d270f9bcfcb3bb5b18b74c14 --- /dev/null +++ b/imperative/src/include/megbrain/imperative/ops/nms.h @@ -0,0 +1,26 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/imperative/op_def.h" + +namespace mgb::imperative { + +class NMSKeep : public OpDefImplBase { + MGB_DYN_TYPE_OBJ_FINAL_DECL; +public: + float iou_thresh; //!< IoU threshold for overlapping + uint32_t max_output; //!< max number of output boxes per batch + NMSKeep() = default; + NMSKeep(float iou_thresh_, uint32_t max_output_): + iou_thresh(iou_thresh_), max_output(max_output_) {} +}; + +} // namespace mgb::imperative diff --git a/imperative/src/include/megbrain/imperative/ops/opr_attr.h b/imperative/src/include/megbrain/imperative/ops/opr_attr.h new file mode 100644 index 0000000000000000000000000000000000000000..5c8aa03a3803d893d498ed85d49368f6a2c5876a --- /dev/null +++ b/imperative/src/include/megbrain/imperative/ops/opr_attr.h @@ -0,0 +1,53 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/imperative/op_def.h" + +namespace mgb { +namespace imperative { + +struct OprAttr : public OpDefImplBase { + MGB_DYN_TYPE_OBJ_FINAL_DECL; +public: + using Type = std::string; + struct Param : public std::vector { + template + void write_pod(const T& data) { + static_assert(!std::is_pointer::value && is_location_invariant::value); + const char* ptr = static_cast(static_cast(&data)); + insert(end(), ptr, ptr + sizeof(T)); + } + template + void write_pod(const T& data, const Args& ...args) { + write_pod(data); + write_pod(args...); + } + }; + + Type type; + Param param; + cg::OperatorNodeConfig config; + + OprAttr() = default; + OprAttr(const Type& t): type(t){} + OprAttr(const Type& t, const Param& p, const cg::OperatorNodeConfig& c): + type(t), param(p), config(c) {} + + std::string repr() const; + + bool is_same_st(const Hashable& rhs) const; + size_t hash() const; +}; + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/include/megbrain/imperative/ops/tensor_manip.h b/imperative/src/include/megbrain/imperative/ops/tensor_manip.h new file mode 100644 index 0000000000000000000000000000000000000000..c559df1cfa41076959e650484e54926362c74644 --- /dev/null +++ b/imperative/src/include/megbrain/imperative/ops/tensor_manip.h @@ -0,0 +1,56 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/imperative/op_def.h" + +namespace mgb::imperative { + +class GetVarShape : public OpDefImplBase { + MGB_DYN_TYPE_OBJ_FINAL_DECL; +public: + GetVarShape() = default; + + size_t hash() const override { + return reinterpret_cast(dyn_typeinfo()); + } + + bool is_same_st(const Hashable& rhs) const override { + return rhs.dyn_typeinfo() == dyn_typeinfo(); + } +}; + +class ParamPackSplit : public OpDefImplBase { + MGB_DYN_TYPE_OBJ_FINAL_DECL; + +public: + ParamPackSplit() = default; + + ParamPackSplit(std::vector& offsets_, + std::vector>& shapes_) + : offsets(offsets_), shapes(shapes_) {} + + std::vector offsets; + std::vector> shapes; +}; + +class ParamPackConcat : public OpDefImplBase { + MGB_DYN_TYPE_OBJ_FINAL_DECL; + +public: + ParamPackConcat() = default; + + ParamPackConcat(std::vector& offsets_) + : offsets(offsets_) {} + + std::vector offsets; +}; + +} // namespace mgb::imperative diff --git a/imperative/src/include/megbrain/imperative/physical_tensor.h b/imperative/src/include/megbrain/imperative/physical_tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..757b84555aeea7d82621f477acd62e90fa594310 --- /dev/null +++ b/imperative/src/include/megbrain/imperative/physical_tensor.h @@ -0,0 +1,138 @@ +/** + * \file src/core/include/megbrain/imperative.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include +#include + +#include "megbrain/tensor.h" + +namespace mgb { +namespace imperative { + +/************************** Tensor *****************************/ +class Blob; +using BlobPtr = std::shared_ptr; + +class BlobManagerImpl; + +class Blob : public NonCopyableObj { +public: + Blob(const DeviceTensorStorage& s); + Blob(CompNode cn, size_t sz); + ~Blob(); + + template + static BlobPtr make(Args&& ...args) { + return std::make_shared(std::forward(args)...); + } + + using RawStorage = DeviceTensorStorage::RawStorage; + const RawStorage& storage(); + + const CompNode& comp_node() const { + return m_comp_node; + } + + size_t size() const { + return m_size; + } +private: + friend class BlobManagerImpl; + CompNode m_comp_node; + mutable RawStorage m_storage; + size_t m_size = 0; +}; + +struct EventDeleter { + void operator()(CompNode::Event*); +}; +using EventPtr = std::unique_ptr; + +class Tensor; +using TensorPtr = std::shared_ptr; +class Tensor : public NonCopyableObj { +public: + Tensor() = default; + Tensor(BlobPtr blob, const TensorLayout& layout, size_t offset = 0, const HostTensorND& hv = {}); + Tensor(BlobPtr blob, const TensorLayout& layout, const HostTensorND& hv = {}) + : Tensor(std::move(blob), layout, 0, hv) {}; + Tensor(const HostTensorND &hv); + Tensor(const DeviceTensorND &dv, const HostTensorND& hv = {}); + Tensor(const TensorLayout& layout, const CompNode& cn); + Tensor(const BlobPtr blob, const size_t offset, const TensorLayout& layout); + + static TensorPtr make(const HostTensorND& hv); + + template, HostTensorND>>> + static TensorPtr make(T&& hv) { + TensorPtr (*f)(const HostTensorND&) = &make; + return f(std::forward(hv)); + }; + + template + static TensorPtr make(Args&& ...args) { + return std::make_shared(std::forward(args)...); + } + + CompNode comp_node() const { + mgb_assert(m_blob, "uninitialized tensor."); + return m_blob->comp_node(); + } + + TensorLayout layout() const { + return m_layout; + } + + DeviceTensorND dev_tensor(); + + static TensorPtr make_scalar(DTypeScalar value, CompNode cn); + + TensorPtr make_scalar(DTypeScalar value) const { + mgb_assert(m_blob, "uninitialized tensor."); + return make_scalar(value, m_blob->comp_node()); + } + + BlobPtr& blob() { + return m_blob; + } + + void fetch_value(); + bool value_fetched(); + TensorPtr sub(size_t offset, TensorShape shape); + + // m_value is set once readonly afterwards + // so the return value is thread safe + const HostTensorND& get_value(); + // return a pointer instead of a reference to ensure thread safety + const HostTensorND* try_get_value(); + + void add_release_callback(CompNode cn); + CompNode::Event* get_or_create_event(); +private: + + TensorLayout m_layout; + BlobPtr m_blob; + size_t m_offset; + std::mutex m_mtx; + HostTensorND m_value; + EventPtr m_value_ready = nullptr; +}; + +struct LogicalTensorDesc { + TensorLayout layout; + CompNode comp_node; + DeviceTensorND value; // cpu:default +}; + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/include/megbrain/imperative/profiler.h b/imperative/src/include/megbrain/imperative/profiler.h new file mode 100644 index 0000000000000000000000000000000000000000..a223ab37495060b2241f232974e37759fe9c9242 --- /dev/null +++ b/imperative/src/include/megbrain/imperative/profiler.h @@ -0,0 +1,52 @@ +/** + * \file src/core/include/megbrain/profiler.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include "megbrain/comp_node.h" +#include "megbrain/graph/event.h" +#include "megbrain/utils/json.h" +#include "megbrain/utils/timer.h" + +#include "megbrain/imperative/op_def.h" + +namespace mgb { +namespace imperative { + +class ProfilerPrivate; + +using OpDefPrinter = thin_function; + +class Profiler { +private: + std::unique_ptr m_private; + +public: + enum EventKind { OprBegin, OprEnd }; + +public: + Profiler(); + Profiler(const std::string& path); + ~Profiler(); + void enable(); + void disable(); + void dump(); + void dump(const std::string& path); + void record_host(size_t id, std::string name, EventKind type, + double host_time); + void record_device(size_t id, std::string name, EventKind type, + double host_time, CompNode comp_node); + double get_device_time(CompNode::Event& event); + size_t get_dump_count(); + std::unique_ptr create_event(CompNode comp_node); + double get_host_time_now(); + std::string print_op(const OpDef& def); +}; +} // namespace imperative +} // namespace mgb diff --git a/imperative/src/test/backward_graph.cpp b/imperative/src/test/backward_graph.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f83058fc2c6a8c370648e340dc63886f7c93fc58 --- /dev/null +++ b/imperative/src/test/backward_graph.cpp @@ -0,0 +1,145 @@ +/** + * \file imperative/src/test/backward_graph.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "./helper.h" +#include "megbrain/opr/basic_arith.h" +#include "megbrain/opr/dnn/batch_norm.h" +#include "megbrain/imperative/ops/opr_attr.h" + +using namespace mgb; +using namespace cg; +using namespace imperative; + +TEST(TestImperative, BackwardGraphBasic) { + HostTensorGenerator<> gen; + SmallVector hvs; + SmallVector inputs; + for(size_t i = 0; i < 2; ++ i) { + hvs.push_back(*gen({42})); + inputs.push_back(Tensor::make(hvs.back())); + } + + using Param = opr::Elemwise::Param; + Param param{Param::Mode::MUL}; + OprAttr attr{"Elemwise", {}, {}}; + attr.param.write_pod(param); + + SmallVector input_descs; + for (auto&& i : inputs) { + input_descs.push_back({i->layout(), i->comp_node()}); + } + auto result = OpDef::make_backward_graph(attr, input_descs, {true, true}, {true}); + auto&& save_for_backward = result.save_for_backward; + auto&& input_has_grad = result.input_has_grad; + + auto outputs = OpDef::apply_on_physical_tensor(attr, inputs); + inputs.push_back(outputs[0]); + hvs.push_back(*gen({42})); + inputs.push_back(Tensor::make(hvs.back())); + mgb_assert(save_for_backward.size() == inputs.size()); + for (size_t i = 0; i < inputs.size(); ++ i) { + if (!save_for_backward[i]) { + inputs[i].reset(); // drop unused tensor + } + } + SmallVector backward_graph_inputs; + for (auto&& i : inputs) { + if (i) { + backward_graph_inputs.push_back(i); + } + } + inputs.clear(); + auto input_grads = OpDef::apply_on_physical_tensor(*(result.backward), backward_graph_inputs); + mgb_assert(input_grads.size() == input_has_grad.size()); + for (size_t i = 0; i < input_has_grad.size(); ++ i) { + mgb_assert(input_has_grad[i] == static_cast(input_grads[i])); + } + + SmallVector res; + for (auto&& i : input_grads) { + res.emplace_back(); + res.back().copy_from(i->dev_tensor()).sync(); + } + for (size_t i = 0; i < 42; ++ i) { + for (size_t j = 0; j < 1; ++ j) { + ASSERT_EQ(hvs[2].ptr()[i] * hvs[j].ptr()[i], res[j ^ 1].ptr()[i]); + } + } +} + +TEST(TestImperative, BackwardGraphIdentity) { + HostTensorGenerator<> gen; + auto host_a = gen({42}), host_dc = gen({42}); + auto a = Tensor::make(*host_a), dc = Tensor::make(*host_dc); + SmallVector inputs; + inputs.push_back(a); + + OprAttr attr{"Identity", {}, {}}; + attr.param.write_pod({}); + + SmallVector input_descs; + input_descs.push_back({a->layout(), a->comp_node()}); + auto result = OpDef::make_backward_graph(attr, input_descs, {true}, {true}); + auto&& save_for_backward = result.save_for_backward; + auto&& input_has_grad = result.input_has_grad; + + auto outputs = OpDef::apply_on_physical_tensor(attr, inputs); + inputs.push_back(outputs[0]); + inputs.push_back(dc); + mgb_assert(save_for_backward.size() == inputs.size()); + for (size_t i = 0; i < inputs.size(); ++ i) { + if (!save_for_backward[i]) { + inputs[i].reset(); // drop unused tensor + } + } + SmallVector backward_graph_inputs; + for (auto&& i : inputs) { + if (i) { + backward_graph_inputs.push_back(i); + } + } + inputs.clear(); + auto input_grads = OpDef::apply_on_physical_tensor(*(result.backward), backward_graph_inputs); + mgb_assert(input_grads.size() == input_has_grad.size()); + for (size_t i = 0; i < input_has_grad.size(); ++ i) { + mgb_assert(input_has_grad[i] == static_cast(input_grads[i])); + } + + HostTensorND hv; + hv.copy_from(input_grads[0]->dev_tensor()).sync(); + for (size_t i = 0; i < 42; ++ i) { + ASSERT_EQ(host_dc->ptr()[i], hv.ptr()[i]); + } +} + +TEST(TestImperative, BatchNormGrad) { + auto cn = CompNode::load("xpux"); + using Param = opr::BatchNorm::Param; + size_t N=2, C=3, H=5, W=5; + LogicalTensorDesc inp{TensorLayout{{N, C, H, W}, dtype::Float32()}, cn}; + LogicalTensorDesc stat{TensorLayout{{C}, dtype::Float32()}, cn}; + { + auto op = OprAttr::make("BatchNorm"); + auto&& attr = op->cast_final_safe(); + Param param; + param.fwd_mode = Param::FwdMode::TRAINING; + attr.param.write_pod(param); + OpDef::make_backward_graph(attr, {inp, stat, stat, stat, stat}, + {true, true ,true, false, false}, {false, false, false, false, true}); + } + { + auto op = OprAttr::make("BatchNorm"); + auto&& attr = op->cast_final_safe(); + Param param; + param.fwd_mode = Param::FwdMode::TRAINING; + attr.param.write_pod(param); + OpDef::make_backward_graph(attr, {inp, stat, stat}, + {true, true ,true}, {false, false, true}); + } +} diff --git a/imperative/src/test/collective_comm.cpp b/imperative/src/test/collective_comm.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b1a1c9ad76fbfebfe91b3935f042715287446ba6 --- /dev/null +++ b/imperative/src/test/collective_comm.cpp @@ -0,0 +1,51 @@ +/** + * \file imperative/src/test/imperative.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "./helper.h" +#include "megbrain/imperative/ops/collective_comm.h" +#include "megbrain/opr/mm_handler.h" + +using namespace mgb; +using namespace imperative; + +TEST(TestImperative, AllReduceBasic) { + REQUIRE_GPU(2); + const char* server_addr = "127.0.0.1"; + uint32_t port = 3456; + mgb_assert(create_zmqrpc_server(server_addr, port) > 0); + HostTensorGenerator<> gen; + CompNode cn0 = CompNode::load("gpu0"), + cn1 = CompNode::load("gpu1"); + + auto host_x = gen({233}, cn0), host_y = gen({233}, cn1); + auto expect = gen({233}); + for (size_t i = 0; i < 233; ++ i) { + expect->ptr()[i] = host_x->ptr()[i] + host_y->ptr()[i]; + } + + auto run = [&](std::shared_ptr hnd, uint32_t idx) { + imperative::CollectiveComm + def{"all_reduce", 2, idx, idx==0, false, server_addr, port, + megdnn::param::CollectiveComm::Mode::ALL_REDUCE_SUM, + dtype::Float32(), "nccl", ""}; + auto inp = Tensor::make(*hnd); + auto oup = OpDef::apply_on_physical_tensor(def, {inp}); + HostTensorND host_v; + host_v.copy_from(oup[0]->dev_tensor()).sync(); + MGB_ASSERT_TENSOR_NEAR(*expect, host_v, 1e-6); + }; + + std::thread t0(std::bind(run, host_x, 0)); + std::thread t1(std::bind(run, host_y, 1)); + + t0.join(); + t1.join(); +} + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/test/cond_take.cpp b/imperative/src/test/cond_take.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dad18671da4f044a1e783532fcad0d569a8785c4 --- /dev/null +++ b/imperative/src/test/cond_take.cpp @@ -0,0 +1,22 @@ +/** + * \file imperative/src/test/imperative.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "./helper.h" +#include "megbrain/imperative/ops/cond_take.h" + +using namespace mgb; +using namespace imperative; + +TEST(TestImperative, CondTake) { + auto op = imperative::CondTake::make(); + auto msk = HostTensorGenerator()({42}); + OprChecker(op).run({TensorShape{42}, *msk}); +} + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/test/helper.cpp b/imperative/src/test/helper.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5fb3119b88dcf7bc1d29c5f80931d5ccce57e42b --- /dev/null +++ b/imperative/src/test/helper.cpp @@ -0,0 +1,164 @@ +/** + * \file imperative/src/test/helper.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "helper.h" +#include "megbrain/graph.h" +#include "megbrain/opr/io.h" + +#include +#include +#include + +namespace py = pybind11; + +namespace mgb { +namespace imperative { + +namespace { + +#define XSTR(s) STR(s) +#define STR(s) #s +#define CONCAT(a, b) a##b +#define PYINIT(name) CONCAT(PyInit_, name) +#define pyinit PYINIT(MODULE_NAME) + +#define UNUSED __attribute__((unused)) + +extern "C" PyObject* pyinit(); + +class PyEnv { + static std::unique_ptr m_instance; + std::unique_ptr m_interpreter; + PyEnv(); +public: + static PyEnv& instance(); + static py::module get(); +}; + +std::unique_ptr PyEnv::m_instance = nullptr; + +PyEnv::PyEnv() { + mgb_assert(!m_instance); + auto err = PyImport_AppendInittab(XSTR(MODULE_NAME), &pyinit); + mgb_assert(!err); + m_interpreter.reset(new py::scoped_interpreter()); +} + +PyEnv& PyEnv::instance() { + if (!m_instance) { + m_instance.reset(new PyEnv()); + } + return *m_instance; +} + +py::module PyEnv::get() { + instance(); + return py::module::import(XSTR(MODULE_NAME)); +} + +py::array array(const Tensor& x) { + PyEnv::get(); + return py::cast(x).attr("numpy")(); +} + +py::array array(const HostTensorND& x) { + return array(*Tensor::make(x)); +} + +py::array array(const DeviceTensorND& x) { + return array(*Tensor::make(x)); +} + +UNUSED void print(const Tensor& x) { + return print(array(x)); +} + +UNUSED void print(const HostTensorND& x) { + return print(array(x)); +} + +UNUSED void print(const DeviceTensorND& x) { + return print(array(x)); +} + +UNUSED void print(const char* s) { + PyEnv::instance(); + py::print(s); +} + +} // anonymous namespace + +OprChecker::OprChecker(std::shared_ptr opdef) + : m_op(opdef) {} + +void OprChecker::run(std::vector inp_keys) { + HostTensorGenerator<> gen; + size_t nr_inps = inp_keys.size(); + SmallVector host_inp(nr_inps); + VarNodeArray sym_inp(nr_inps); + auto graph = ComputingGraph::make(); + graph->options().graph_opt_level = 0; + for (size_t i = 0; i < nr_inps; ++ i) { + host_inp[i] = std::visit([&gen](auto&& arg) -> HostTensorND { + using T = std::decay_t; + if constexpr (std::is_same_v) { + return *gen(arg); + } else { + static_assert(std::is_same_v); + return arg; + } + }, inp_keys[i]); + sym_inp[i] = opr::SharedDeviceTensor::make(*graph, host_inp[i]).node(); + } + auto sym_oup = OpDef::apply_on_var_node(*m_op, sym_inp)->usable_output(); + size_t nr_oups = sym_oup.size(); + ComputingGraph::OutputSpec oup_spec(nr_oups); + SmallVector host_sym_oup(nr_oups); + for (size_t i = 0; i < nr_oups; ++ i) { + oup_spec[i] = make_callback_copy(sym_oup[i], host_sym_oup[i]); + } + auto func = graph->compile(oup_spec); + + SmallVector imp_physical_inp(nr_inps); + for (size_t i = 0; i < nr_inps; ++ i) { + imp_physical_inp[i] = Tensor::make(host_inp[i]); + } + + auto imp_oup = OpDef::apply_on_physical_tensor(*m_op, imp_physical_inp); + mgb_assert(imp_oup.size() == nr_oups); + + // check input not modified + for (size_t i = 0; i < imp_physical_inp.size(); ++i) { + HostTensorND hv; + hv.copy_from(imp_physical_inp[i]->dev_tensor()).sync(); + MGB_ASSERT_TENSOR_EQ(hv, host_inp[i]); + } + + SmallVector host_imp_oup(nr_oups); + for (size_t i = 0; i < nr_oups; ++ i) { + host_imp_oup[i].copy_from(imp_oup[i]->dev_tensor()).sync(); + } + + func->execute().wait(); // run last because it may contain inplace operations + + for(size_t i = 0; i < nr_oups; ++ i) { + MGB_ASSERT_TENSOR_EQ(host_sym_oup[i], host_imp_oup[i]); + } +} + +TEST(TestHelper, PyModule) { + py::module m = PyEnv::get(); + py::print(m); + py::print(py::cast(DeviceTensorND())); +} + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/test/helper.h b/imperative/src/test/helper.h new file mode 100644 index 0000000000000000000000000000000000000000..ad172f21086d27aec88366652dedf56f25e95d30 --- /dev/null +++ b/imperative/src/test/helper.h @@ -0,0 +1,32 @@ +/** + * \file imperative/src/test/helper.h + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#pragma once + +#include + +#include "megbrain/imperative.h" +#include "megbrain/test/helper.h" + +namespace mgb { +namespace imperative { + +class OprChecker { +public: + using InputSpec = std::variant; + OprChecker(std::shared_ptr opdef); + void run(std::vector inp_shapes); +private: + std::shared_ptr m_op; +}; + +} // namespace imperative +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/test/imperative.cpp b/imperative/src/test/imperative.cpp new file mode 100644 index 0000000000000000000000000000000000000000..84072ae9bd5dc2515820947e2eb736bd563ddf8e --- /dev/null +++ b/imperative/src/test/imperative.cpp @@ -0,0 +1,181 @@ +/** + * \file imperative/src/test/imperative.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "./helper.h" +#include "megbrain/opr/basic_arith.h" +#include "megbrain/opr/basic_arith_wrapper.h" +#include "megbrain/opr/dnn/convolution.h" +#include "megbrain/opr/tensor_manip.h" +#include "megbrain/opr/dnn/batch_norm.h" +#include "megbrain/opr/utility.h" +#include "megbrain/imperative/blob_manager.h" +#include "megbrain/imperative/ops/opr_attr.h" +#include "megbrain/comp_node_env.h" + + +using namespace mgb; +using namespace cg; +using namespace imperative; + +TEST(TestImperative, APlusB) { + auto op = OprAttr::make("Elemwise"); + auto&& attr = op->cast_final_safe(); + using Param = opr::Elemwise::Param; + Param param{Param::Mode::ADD}; + attr.param.write_pod(param); + OprChecker(op).run({TensorShape{42}, TensorShape{42}}); +} + +TEST(TestImperative, Convolution) { + auto op = OprAttr::make("ConvolutionV1"); + auto&& attr = op->cast_final_safe(); + using Param = opr::Convolution::Param; + using Policy = opr::Convolution::ExecutionPolicy; + Param param{Param::Mode::CONVOLUTION}; + Policy policy{Policy::Strategy::HEURISTIC}; + attr.param.write_pod(param); + attr.param.write_pod(policy); + size_t N = 4, IC = 3, OC = 8, FH = 3, FW = 3, IH = 16, IW = 16; + OprChecker(op).run({TensorShape{N, IC, IH, IW}, TensorShape{OC, IC, FH, FW}}); +} + +TEST(TestImperative, Reduce) { + auto op = OprAttr::make("ReduceV2"); + auto&& attr = op->cast_final_safe(); + using Param = opr::Reduce::Param; + Param param{Param::Mode::SUM_SQR}; + attr.param.write_pod(param); + HostTensorND one{CompNode::load("xpu0"), {{1}, dtype::Int32()}}; + one.ptr()[0] = 1; + OprChecker(op).run({TensorShape{2, 3, 4}, one}); +} + +TEST(TestImperative, BatchNorm) { + auto op = OprAttr::make("BatchNorm"); + auto&& attr = op->cast_final_safe(); + using Param = opr::BatchNorm::Param; + Param param; + param.param_dim = Param::ParamDim::DIM_1C11; + param.avg_factor = 0.999; + attr.param.write_pod(param); + size_t N=2, C=3, H=5, W=5; + OprChecker(op).run({ + TensorShape{N, C, H, W}, + TensorShape{1, C, 1, 1}, + TensorShape{1, C, 1, 1}, + TensorShape{1, C, 1, 1}, + TensorShape{1, C, 1, 1} + }); +} + +TEST(TestImperative, Concat) { + OprAttr::Param param; + param.write_pod(megdnn::param::Axis(0)); + OperatorNodeConfig config{CompNode::load("xpu1")}; + OprChecker(OprAttr::make("Concat", param, config)) + .run({TensorShape{200, 300}, TensorShape{300, 300}}); +} + +TEST(TestImperative, Split) { + OprAttr::Param param; + param.write_pod(megdnn::param::Axis(0)); + auto op = OprAttr::make("Split", param, OperatorNodeConfig{}); + auto cn = CompNode::load("xpu0"); + HostTensorND s1{cn, {{1}, dtype::Int32()}}; + s1.ptr()[0] = 20; + HostTensorND s2{cn, {{1}, dtype::Int32()}}; + s2.ptr()[0] = 80; + OprChecker(op).run({TensorShape{100}, s1, s2}); +} + +#if MGB_CUDA && MGB_ENABLE_EXCEPTION +void run_graph(size_t mem_reserved, bool enable_defrag) { + CompNode::try_coalesce_all_free_memory(); + CompNode::finalize(); + + auto cn = CompNode::load("gpux"); + cn.sync(); // wait for async init to finish + + BlobManager::inst() -> set_enable(enable_defrag); + + HostTensorGenerator<> gen; + using TensorPtr = std::shared_ptr; + TensorPtr ptr_a[100]; + + size_t unit_size = mem_reserved / (100.5 * 4); + auto host_a = gen({unit_size}); + for(int i = 0; i < 100; ++i) { + ptr_a[i] = Tensor::make(*host_a); + } + + // free half + for(int i = 0; i < 100; i += 2) { + ptr_a[i].reset(); + } + + auto op = OprAttr::make("Elemwise"); + auto&& attr = op->cast_final_safe(); + using Param = opr::Elemwise::Param; + Param param{Param::Mode::MUL}; + attr.param.write_pod(param); + + auto out = OpDef::apply_on_physical_tensor(*op, {ptr_a[1], ptr_a[99]}).at(0); + + // value before defrag + HostTensorND host_out_before; + host_out_before.copy_from(out->dev_tensor()).sync(); + + // make defrag work + auto e = Tensor::make(*gen({unit_size * 10})); + + // value after defrag + HostTensorND host_out_after; + host_out_after.copy_from(out->dev_tensor()).sync(); + + // make sure defragment do not change the value + for (size_t i = 0; i < unit_size; ++ i) { + ASSERT_EQ(host_out_before.ptr()[i], host_out_after.ptr()[i]); + } +} + +TEST(TestImperative, Defragment) { + REQUIRE_GPU(1); + CompNode::load("gpux").activate(); + size_t reserve; + { + size_t free, tot; + MGB_CUDA_CHECK(cudaMemGetInfo(&free, &tot)); + reserve = free * 0.92; + } + auto reserve_setting = ssprintf("b:%zu", reserve); + + auto do_run = [reserve]() { + ASSERT_THROW(run_graph(reserve, false), MemAllocError); + run_graph(reserve, true); + }; + + // reserve memory explicitly to avoid uncontrollable factors + constexpr const char* KEY = "MGB_CUDA_RESERVE_MEMORY"; + auto old_value = getenv(KEY); + setenv(KEY, reserve_setting.c_str(), 1); + MGB_TRY { + do_run(); + } MGB_FINALLY( + if (old_value) { + setenv(KEY, old_value, 1); + } else { + unsetenv(KEY); + } + CompNode::try_coalesce_all_free_memory(); + CompNode::finalize(); + ); +} +#endif // MGB_CUDA && MGB_ENABLE_EXCEPTION + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/test/io_remote.cpp b/imperative/src/test/io_remote.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a4d914139de29365e31f1ec3fc7501d9c1833eea --- /dev/null +++ b/imperative/src/test/io_remote.cpp @@ -0,0 +1,66 @@ +/** + * \file imperative/src/test/imperative.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "./helper.h" +#include "megbrain/imperative/ops/io_remote.h" +#include "megbrain/opr/mm_handler.h" + +using namespace mgb; +using namespace imperative; + +TEST(TestImperative, IORemote) { + REQUIRE_GPU(2); + const char* server_addr = "127.0.0.1"; + uint32_t port = 4567; + mgb_assert(create_zmqrpc_server(server_addr, port) > 0); + HostTensorGenerator<> gen; + CompNode cn0 = CompNode::load("gpu0"), cn1 = CompNode::load("gpu1"); + + size_t vector_size = 233; + auto host_x = gen({vector_size}, cn0), host_y = gen({vector_size}, cn1); + + auto expect = gen({vector_size}); + for (size_t i = 0; i < vector_size; ++i) { + expect->ptr()[i] = host_x->ptr()[i]; + } + + auto run_send = [&](std::shared_ptr hnd) { + imperative::RemoteSend def{"io_remote_test", server_addr, port, 1}; + auto inp = Tensor::make(*hnd); + auto oup = OpDef::apply_on_physical_tensor(def, {inp}); + }; + + auto run_recv = [&](std::shared_ptr hnd) { + // auto&& shape = std::initializer_list{vector_size}; + imperative::RemoteRecv def{"io_remote_test", + server_addr, + port, + 0, + { + vector_size, + }, + CompNode::load("gpu1"), + dtype::Float32()}; + auto inp = Tensor::make(*hnd); + auto oup = OpDef::apply_on_physical_tensor(def, {inp}); + HostTensorND host_v; + host_v.copy_from(oup[0]->dev_tensor()).sync(); + MGB_ASSERT_TENSOR_NEAR(*expect, host_v, 1e-6); + }; + + std::thread t0(std::bind(run_send, host_x)); + std::thread t1(std::bind(run_recv, host_y)); + + t0.join(); + t1.join(); +} + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} + +// ./imperative_test --gtest_filter TestIORemote diff --git a/imperative/src/test/opr_utility.cpp b/imperative/src/test/opr_utility.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3fcf22c1dcb1b5cb75d0e4a4f7ca5d4be2f7f0ad --- /dev/null +++ b/imperative/src/test/opr_utility.cpp @@ -0,0 +1,138 @@ +/** + * \file imperative/src/test/opr_utility.cpp + * + * This file is part of MegBrain, a deep learning framework developed by Megvii. + * + * \copyright Copyright (c) 2014-2019 Megvii Inc. All rights reserved. + * + */ + +#include "megbrain/imperative/opr_utility.h" +#include "megbrain/opr/io.h" +#include "megbrain/opr/basic_arith.h" +#include "megbrain/opr/utility.h" +#include "megbrain/test/helper.h" + +using namespace mgb; +using namespace opr; + +TEST(TestOprUtility, InputCallback) { + HostTensorGenerator<> gen; + DeviceTensorND dv; + auto hv = gen({2, 3}); + dv.copy_from(*hv).sync(); + auto graph = ComputingGraph::make(); + auto callback = [dv]() {return dv;}; + auto outputs = opr::InputCallback::make(*graph, callback, dv.comp_node(), dv.dtype()); + + HostTensorND hout; + ComputingGraph::OutputSpec outspec{make_callback_copy(outputs[0], hout)}; + auto func = graph->compile(outspec); + func->execute(); + MGB_ASSERT_TENSOR_EQ(hout, *hv); +} + +TEST(TestOprUtility, OutputCallback) { + HostTensorGenerator<> gen; + auto hx = gen({2, 3}); + auto graph = ComputingGraph::make(); + auto x = opr::Host2DeviceCopy::make(*graph, hx); + HostTensorND hy; + auto callback = [&hy](DeviceTensorND dv) {hy.copy_from(dv);}; + auto dummy = opr::OutputCallback::make({callback}, x); + auto y = opr::VirtualDep::make({x, dummy}); + + ComputingGraph::OutputSpec outspec{{y, [](DeviceTensorND&){}}}; + auto func = graph->compile(outspec); + func->execute(); + MGB_ASSERT_TENSOR_EQ(hy, *hx); +} + +TEST(TestOprUtility, NopCallback) { + HostTensorGenerator<> gen; + auto hx = gen({2, 3}); + auto graph = ComputingGraph::make(); + auto x = opr::Host2DeviceCopy::make(*graph, hx); + bool fired = false; + auto callback = [&fired]() {fired = true;}; + auto dummy = opr::NopCallback::make(*graph, callback, x.node()->comp_node(), {x}); + auto y = opr::VirtualDep::make({x, dummy}); + + ComputingGraph::OutputSpec outspec{{y, [](DeviceTensorND&){}}}; + auto func = graph->compile(outspec); + func->execute(); + ASSERT_TRUE(fired); +} + +TEST(TestOprUtility, NopCallbackMixedInput) { + auto graph = ComputingGraph::make(); + auto x0 = opr::Host2DeviceCopy::make(*graph, HostTensorGenerator()({2, 3}), OperatorNodeConfig(CompNode::load("xpu0"))); + auto x1 = opr::Host2DeviceCopy::make(*graph, HostTensorGenerator()({2, 3}), OperatorNodeConfig(CompNode::load("xpu1"))); + + bool fired = false; + auto callback = [&fired]() {fired = true;}; + auto dummy = opr::NopCallback::make(*graph, callback, CompNode::load("xpux"), {x0, x1}); + auto y = opr::VirtualDep::make({x0, dummy}); + + ComputingGraph::OutputSpec outspec{{y, [](DeviceTensorND&){}}}; + auto func = graph->compile(outspec); + func->execute(); + ASSERT_TRUE(fired); +} + +TEST(TestOprUtility, CallbackChain) { + auto graph = ComputingGraph::make(); + graph->options().graph_opt_level = 0; + HostTensorGenerator gen; + SymbolVar x, dummy; + DeviceTensorND dev_x, dev_y; + auto host_x = gen({2, 3}); + dev_x.copy_from(*host_x).sync(); + auto cn = dev_x.comp_node(); + auto dev_x_weakptr = std::weak_ptr(dev_x.storage().raw_storage()); + + { + auto callback = [&dev_x]() { + DeviceTensorND ret = dev_x; + dev_x.storage({}); + return ret; + }; + auto out = opr::InputCallback::make(*graph, callback, cn, dev_x.dtype()); + x = out[0]; + dummy = out[1]; + } + + { + x = opr::TypeCvt::make(x, dtype::Int32()); + x = opr::TypeCvt::make(x, dtype::Int16()); + auto callback = [&](DeviceTensorND y) { + // dev_x.storage has been reset in InputCallback + mgb_assert(!dev_x.storage().comp_node_valid()); + dev_y = y; + }; + dummy = opr::OutputCallback::make({callback}, {x, dummy}); + } + + bool fired = false; + { + auto callback = [&]() { + fired = true; + ASSERT_FALSE(dev_x_weakptr.lock()); + }; + dummy = opr::NopCallback::make(*graph, callback, cn, {dummy}); + } + + { + auto out = opr::VirtualDep::make({x.make_scalar(0), dummy}); + ComputingGraph::OutputSpec outspec{{out, [](DeviceTensorND&){}}}; + auto func = graph->compile(outspec); + func->execute(); + } + + ASSERT_TRUE(fired); + HostTensorND host_y; + host_y.copy_from(dev_y).sync(); + MGB_ASSERT_TENSOR_EQ(host_y, *host_x); +} + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/version.ld b/imperative/src/version.ld new file mode 100644 index 0000000000000000000000000000000000000000..19c7cde52ae01019004a1e076c5ee4b387da7892 --- /dev/null +++ b/imperative/src/version.ld @@ -0,0 +1,17 @@ +{ +global: + MGB_VSYM_*; + MEGDNN_VSYM_*; + mgb_get_extern_c_opr_api_versioned; + PyInit__imperative_rt; + extern "C++" { + *mgb::*; + *megdnn::*; + *megcore::*; + megcore*; + }; + megcore*; + +local: + *; +}; diff --git a/imperative/test/CMakeLists.txt b/imperative/test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b766cddce55b0893d49907bfa010d159d662fb7 --- /dev/null +++ b/imperative/test/CMakeLists.txt @@ -0,0 +1,45 @@ +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter") +set(MGB_TEST_DIR ${PROJECT_SOURCE_DIR}/test/src) + +file(GLOB_RECURSE SOURCES ../src/test/*.cpp ../src/impl/*.cpp ${MGB_TEST_DIR}/*.cpp) + +# disable distributed tests +if(NOT MGE_WITH_DISTRIBUTED) + list(FILTER SOURCES EXCLUDE REGEX ".*test/collective_comm.cpp") + list(FILTER SOURCES EXCLUDE REGEX ".*test/io_remote.cpp") +endif() + +# TODO: turn python binding into a static/object library +add_executable(imperative_test ${SOURCES} ${SRCS}) +target_include_directories(imperative_test PRIVATE ${MGB_TEST_DIR}/include ../src/include) + +# Python binding +target_include_directories(imperative_test PRIVATE ${MODULE_SRC_INCLUDE} ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR}) +target_compile_definitions(imperative_test PRIVATE MODULE_NAME=C) +target_compile_options(imperative_test PRIVATE -Wno-unused-parameter) + +set(LINK_LIBS megbrain megdnn gtest pybind11::embed gen_op_def) +if(MGE_WITH_CUDA) + list(APPEND LINK_LIBS cudart) +endif() + +if(MGE_WITH_DISTRIBUTED) + list(APPEND LINK_LIBS megray) +endif() + +target_link_libraries(imperative_test ${LINK_LIBS}) +if(CXX_SUPPORT_WCLASS_MEMACCESS) + if(MGE_WITH_CUDA) + target_compile_options(imperative_test PRIVATE "$<$:-Xcompiler=-Wno-class-memaccess>" + "$<$>:-Wno-class-memaccess>") + else() + target_compile_options(imperative_test PRIVATE "-Wno-class-memaccess") + endif() +endif() + +if(UNIX) + target_link_libraries(imperative_test dl rt) +endif() + + +install(TARGETS imperative_test RUNTIME DESTINATION test) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9b3eeba42b0fa9907c34984fe96aad7540eaa58b..895919a49aad7233311c58720ffbefe8e0b0e205 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -168,25 +168,28 @@ if(ANDROID) target_link_libraries(megbrain PUBLIC log) endif() -if(NOT MGE_BUILD_IMPERATIVE_RT) - # Build as SHARED or STATIC depending on BUILD_SHARED_LIBS=ON/OFF - add_library(megengine) - target_link_libraries(megengine PUBLIC megbrain megdnn) - if (UNIX AND NOT APPLE) - # TODO: Use target_link_options after upgrading to CMake 3.13 - # FIXME; Please use right directory for mgb or imperative - target_link_options(megengine PRIVATE -Wl,--no-undefined -Wl,--version-script=${PROJECT_SOURCE_DIR}/python_module/src/version.ld) - endif() - set_target_properties(megengine PROPERTIES CXX_VISIBILITY_PRESET default) - set_target_properties(megengine PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) - # Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready - # for this. - install(TARGETS megengine - EXPORT ${MGE_EXPORT_TARGETS} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) +if(MGE_BUILD_IMPERATIVE_RT) + set (_VER_FILE ${PROJECT_SOURCE_DIR}/python_module/src/version.ld) +else() + set (_VER_FILE ${PROJECT_SOURCE_DIR}/imperative/src/version.ld) endif() +# Build as SHARED or STATIC depending on BUILD_SHARED_LIBS=ON/OFF +add_library(megengine) +target_link_libraries(megengine PUBLIC megbrain megdnn) +if (UNIX AND NOT APPLE) + # TODO: Use target_link_options after upgrading to CMake 3.13 + target_link_options(megengine PRIVATE -Wl,--no-undefined -Wl,--version-script=${_VER_FILE}) +endif() +set_target_properties(megengine PROPERTIES CXX_VISIBILITY_PRESET default) +set_target_properties(megengine PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) +# Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready +# for this. +install(TARGETS megengine + EXPORT ${MGE_EXPORT_TARGETS} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + if (NOT MGE_WITH_DISTRIBUTED) install(TARGETS megbrain EXPORT ${MGE_EXPORT_TARGETS} diff --git a/src/opr/impl/standalone/nms_cpu.cpp b/src/opr/impl/standalone/nms_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..21dc9c9eb4528b1b9845461fba0a7d0b59935aeb --- /dev/null +++ b/src/opr/impl/standalone/nms_cpu.cpp @@ -0,0 +1,60 @@ +#include "./nms_cpu.h" + +#include +#include + +namespace { +struct Box { + float x0, y0, x1, y1; +}; + +bool box_iou(Box a, Box b, float thresh) { + using std::max; + using std::min; + float left = max(a.x0, b.x0), right = min(a.x1, b.x1); + float top = max(a.y0, b.y0), bottom = min(a.y1, b.y1); + float width = max(right - left, 0.f), + height = max(bottom - top, 0.f); + float interS = width * height; + float Sa = (a.x1 - a.x0) * (a.y1 - a.y0); + float Sb = (b.x1 - b.x0) * (b.y1 - b.y0); + return interS > (Sa + Sb - interS) * thresh; +} +} // anonymous namespace + +size_t mgb::opr::standalone::nms::cpu_kern_workspace(size_t nr_boxes) { + return (((nr_boxes - 1) / sizeof(size_t)) + 1) * sizeof(size_t); +} + +void mgb::opr::standalone::nms::cpu_kern(size_t nr_boxes, size_t max_output, + float overlap_thresh, + const float* boxes, uint32_t* out_idx, + uint32_t* out_size, void* workspace) { + size_t out_pos = 0, last_out = 0; + auto boxes_bptr = reinterpret_cast(boxes); + auto kept_mask = static_cast(workspace); + memset(kept_mask, 0, cpu_kern_workspace(nr_boxes)); + for (size_t i = 0; i < nr_boxes; ++i) { + bool supressed = false; + auto ibox = boxes_bptr[i]; + for (size_t j = 0; j < i; ++j) { + bool j_kept = + (kept_mask[j / sizeof(size_t)] >> (j % sizeof(size_t))) & 1; + if (j_kept && box_iou(ibox, boxes_bptr[j], overlap_thresh)) { + supressed = true; + break; + } + } + if (!supressed) { + kept_mask[i / sizeof(size_t)] |= size_t(1) << (i % sizeof(size_t)); + last_out = i; + out_idx[out_pos++] = i; + if (out_pos == max_output) + break; + } + } + *out_size = out_pos; + while (out_pos < max_output) { + out_idx[out_pos++] = last_out; + } +} diff --git a/src/opr/impl/standalone/nms_cpu.h b/src/opr/impl/standalone/nms_cpu.h new file mode 100644 index 0000000000000000000000000000000000000000..918a2330b1f7186713a2671ec9c2c54ce30c0f59 --- /dev/null +++ b/src/opr/impl/standalone/nms_cpu.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + +namespace mgb { +namespace opr { +namespace standalone { +namespace nms { + +/*! + * \brief CPU single-batch nms kernel + * + * See nms_kern.cuh for explanation on the parameters. + */ +void cpu_kern(size_t nr_boxes, size_t max_output, float overlap_thresh, + const float* boxes, uint32_t* out_idx, uint32_t* out_size, + void* workspace); + +size_t cpu_kern_workspace(size_t nr_boxes); + +} // namespace nms +} // namespace standalone +} // namespace opr +} // namespace mgb diff --git a/src/opr/impl/standalone/nms_kern.cu b/src/opr/impl/standalone/nms_kern.cu new file mode 100644 index 0000000000000000000000000000000000000000..1c5e70fea2cee8bc288baace5a0fa4a143a26b12 --- /dev/null +++ b/src/opr/impl/standalone/nms_kern.cu @@ -0,0 +1,216 @@ +#include "nms_kern.cuh" + +#include +#include + +namespace { + +// each thread computs one bit +const int THREADS_PER_BLOCK = 64; + +const int WARP_SIZE = 32; + +// use aligned structure for large memory transaction +struct __align__(16) Box { + float x0, y0, x1, y1; +}; + +//! return whether IoU(a, b) > thresh +__device__ __forceinline__ bool box_iou(Box a, Box b, float thresh) { + float left = max(a.x0, b.x0), right = min(a.x1, b.x1); + float top = max(a.y0, b.y0), bottom = min(a.y1, b.y1); + float width = max(right - left, 0.f), + height = max(bottom - top, 0.f); + float interS = width * height; + float Sa = (a.x1 - a.x0) * (a.y1 - a.y0); + float Sb = (b.x1 - b.x0) * (b.y1 - b.y0); + return interS > (Sa + Sb - interS) * thresh; +} + +//! store uint64_t with cache streaming +__device__ __forceinline__ void store_u64_cs(uint64_t *ptr, uint64_t val) { + asm volatile("st.cs.u64 [%0], %1;" : : "l"(ptr), "l"(val)); +} + +//! load uint64_t with cache streaming +__device__ __forceinline__ uint64_t load_u64_cs(const uint64_t *ptr) { + uint64_t val; + asm volatile("ld.cs.u64 %0, [%1];" : "=l"(val) : "l"(ptr)); + return val; +} + +__global__ void kern_gen_mask( + const int nr_boxes, const float nms_overlap_thresh, + const Box *dev_boxes, const int dev_mask_width, uint64_t *dev_mask) { + const int + box_group_row = blockIdx.y, + box_group_col = blockIdx.x; + + if (box_group_row > box_group_col) + return; + + const int + row_nr_boxes = min( + nr_boxes - box_group_row * THREADS_PER_BLOCK, + THREADS_PER_BLOCK), + col_nr_boxes = min( + nr_boxes - box_group_col * THREADS_PER_BLOCK, + THREADS_PER_BLOCK); + + __shared__ Box block_boxes[THREADS_PER_BLOCK]; + + if (threadIdx.x < col_nr_boxes) { + block_boxes[threadIdx.x] = dev_boxes[ + THREADS_PER_BLOCK * box_group_col + threadIdx.x]; + } + __syncthreads(); + + if (threadIdx.x < row_nr_boxes) { + const int cur_box_idx = THREADS_PER_BLOCK * box_group_row + threadIdx.x; + Box cur_box = dev_boxes[cur_box_idx]; + + uint64_t result = 0; + const int start = (box_group_row == box_group_col) ? + threadIdx.x + 1 : // blocks on diagnal + 0; + for (int i = start; i < col_nr_boxes; ++ i) { + result |= static_cast( + box_iou(cur_box, block_boxes[i], + nms_overlap_thresh)) << i; + } + store_u64_cs( + &dev_mask[cur_box_idx * dev_mask_width + box_group_col], + result); + } +} + +//! true -> ~0, false -> 0 +__device__ __forceinline__ uint32_t bool_as_u32_mask(bool v) { + return (!v) - 1; +} + +//! return min value of val in current warp +__device__ __forceinline__ uint32_t warp_reduce_min_brdcst(uint32_t val) { + __shared__ uint32_t ans; + static_assert(WARP_SIZE == 32, "warp size != 32"); +#pragma unroll + for (uint32_t offset = WARP_SIZE / 2; offset; offset /= 2) + val = min(val, __shfl_down_sync(0xFFFFFFFF, val, offset)); + + if (!threadIdx.x) + ans = val; + __syncthreads(); + return ans; +} + +struct BitwiseOrArgs { + uint64_t *dst; + const uint64_t *src; + uint32_t size; +}; + +__device__ __forceinline__ void bitwise_or_single_warp(BitwiseOrArgs args) { + uint64_t * __restrict__ dst = args.dst; + const uint64_t * __restrict__ src = args.src; + uint32_t size = args.size; + for (uint32_t i = threadIdx.x; i < size; i += WARP_SIZE) { + dst[i] |= load_u64_cs(&src[i]); + } +} + +__global__ void kern_gen_indices( + uint32_t nr_boxes, uint32_t max_output, uint32_t overlap_mask_width, + const uint64_t * __restrict__ overlap_mask, uint64_t *__restrict__ rm_mask, + uint32_t * __restrict__ out_idx, uint32_t * __restrict__ out_size) { + __shared__ uint32_t out_pos; + __shared__ BitwiseOrArgs bitwise_or_args; + + const uint32_t nr_box_blocks = DIVUP(nr_boxes, 64); + + if (!threadIdx.x) { + uint32_t cnt = nr_box_blocks * 64 - nr_boxes; + // mark the padded boxes as having been removed + rm_mask[nr_box_blocks - 1] = ((1ull << cnt) - 1) << (64 - cnt); + out_pos = 0; + } + __syncthreads(); + + uint32_t + box_block_id = threadIdx.x, + th0_box_block_id = 0; + + while (th0_box_block_id < nr_box_blocks) { + bool in_range = box_block_id < nr_box_blocks; + uint64_t cur_mask = ~rm_mask[box_block_id & bool_as_u32_mask(in_range)]; + uint32_t min_box_block_id = warp_reduce_min_brdcst( + box_block_id | bool_as_u32_mask(!(in_range && cur_mask))); + + if (min_box_block_id + 1) { + // min_box_block_id != UINT32_MAX, so at least one thread finds a + // un-removed box + if (min_box_block_id == box_block_id) { + // exactly one thread can take this path + uint32_t box_id_in_block = __ffsll(cur_mask) - 1, + box_id = box_block_id * 64 + box_id_in_block; + + // so this box would not be processed again + rm_mask[box_block_id] |= 1ull << box_id_in_block; + + bitwise_or_args.dst = &rm_mask[box_block_id]; + bitwise_or_args.src = + &overlap_mask[box_id * overlap_mask_width + box_block_id]; + bitwise_or_args.size = nr_box_blocks - box_block_id; + out_idx[out_pos ++] = box_id; + } + __syncthreads(); + if (out_pos == max_output) + break; + bitwise_or_single_warp(bitwise_or_args); + + // skip the blocks before min_box_block_id + th0_box_block_id = min_box_block_id; + box_block_id = min_box_block_id + threadIdx.x; + } else { + th0_box_block_id += WARP_SIZE; + box_block_id += WARP_SIZE; + } + } + + if (out_pos < max_output) { + // fill the values after out_pos + uint32_t val = out_idx[out_pos - 1]; + for (uint32_t i = out_pos + threadIdx.x; i < max_output; i += WARP_SIZE) { + out_idx[i] = val; + } + } + if (!threadIdx.x) { + *out_size = out_pos; + } +} + +} // anonymous namespace + +void mgb::opr::standalone::nms::launch_gen_mask( + const int nr_boxes, const float nms_overlap_thresh, + const float *dev_boxes, const int dev_mask_width, + uint64_t *dev_mask, cudaStream_t stream) { + dim3 blocks(DIVUP(nr_boxes, THREADS_PER_BLOCK), + DIVUP(nr_boxes, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + kern_gen_mask<<>>( + nr_boxes, nms_overlap_thresh, + reinterpret_cast(dev_boxes), dev_mask_width, dev_mask); +} + +void mgb::opr::standalone::nms::launch_gen_indices( + int nr_boxes, int max_output, int overlap_mask_width, + const uint64_t *overlap_mask, uint64_t *rm_mask, + uint32_t *out_idx, uint32_t *out_size, + cudaStream_t stream) { + kern_gen_indices<<<1, WARP_SIZE, 0, stream>>>( + nr_boxes, max_output, overlap_mask_width, + overlap_mask, rm_mask, + out_idx, out_size); +} + +// vim: ft=cuda syntax=cuda.doxygen diff --git a/src/opr/impl/standalone/nms_kern.cuh b/src/opr/impl/standalone/nms_kern.cuh new file mode 100644 index 0000000000000000000000000000000000000000..aad49b2e1665995eb0e9bbf486bd594f7752a1d4 --- /dev/null +++ b/src/opr/impl/standalone/nms_kern.cuh @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + +#define DIVUP(m, n) (((m)-1) / (n) + 1) + +namespace mgb { +namespace opr { +namespace standalone { +namespace nms { + +/*! + * \brief launch the kernel to generate nms mask + * \param nr_boxes number of input boxes + * \param nms_overlap_thresh overlapping threshold for IoU + * \param[in] dev_boxes boxes in [n, 4] layout, + * each row containing (x0, y0, x1, y1) + * \param dev_mask_width width in number of uint64_t elements of div_mask + * matrix; must be at least ceil(n, 64) + * \param[out] dev_mask [n, dev_mask_width] dev_mask[i] is a + * bitmask of length n indicating whether i overlaps with each box. Only + * the upper triangle (row < col) are filled. + */ +void launch_gen_mask(const int nr_boxes, const float nms_overlap_thresh, + const float* dev_boxes, const int dev_mask_width, + uint64_t* dev_mask, cudaStream_t stream); + +/*! + * \brief launch the kernel to generate indices of kept boxes + * \param max_output max number of entries to be written to out_idx + * \param overlap_mask the mask generated by launch_gen_mask + * \param[in,out] rm_mask mask of removed boxes; must be initialized as 0 + * \param[out] out_idx indices of boxes to be kept + * \param[out] out_size number of items written to out_idx; the remaining items + * would be filled with the last valid item + */ +void launch_gen_indices(int nr_boxes, int max_output, int overlap_mask_width, + const uint64_t* overlap_mask, uint64_t* rm_mask, + uint32_t* out_idx, uint32_t* out_size, + cudaStream_t stream); + +} // namespace nms +} // namespace standalone +} // namespace opr +} // namespace mgb + +// vim: ft=cuda syntax=cuda.doxygen diff --git a/src/opr/impl/standalone/nms_opr.cpp b/src/opr/impl/standalone/nms_opr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3a88a9a8669dba1c21c3cafff965c00c6b13ec89 --- /dev/null +++ b/src/opr/impl/standalone/nms_opr.cpp @@ -0,0 +1,272 @@ +#include "megbrain/opr/standalone/nms_opr.h" + +#if MGB_CUDA +#include "./nms_kern.cuh" +#endif +#include "./nms_cpu.h" + +#include "megbrain/comp_node_env.h" +#include "megbrain/serialization/sereg.h" +#include "megbrain/utils/arith_helper.h" // for get_aligned_power2 + +#if MGB_ENABLE_FBS_SERIALIZATION +#include "megbrain/serialization/internal/mgb_cpp_opr_generated.h" +#include "megbrain/serialization/internal/schema_generated.h" +#endif + +using namespace mgb::opr::standalone; + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(NMSKeep); + +class NMSKeep::Kern { +public: + virtual ~Kern() = default; + + //! get workspace size in bytes + virtual size_t get_workspace_size(const NMSKeep* opr, + const TensorShape& boxes) = 0; + virtual void exec(const NMSKeep* opr, const DeviceTensorND& inp, + const DeviceTensorND& out_idx, + const DeviceTensorND& out_size, + const DeviceTensorND& workspace) = 0; +}; + +// f{{{ cuda kernel begins +#if MGB_CUDA +class NMSKeep::CUDAKern final : public Kern { + size_t m_workspace_overlap_mask_bytes, m_workspace_overlap_mask_bytes_align, + m_workspace_rm_mask_bytes; + + void init(const NMSKeep* opr, const TensorShape& boxes) { + auto align = opr->comp_node().get_mem_addr_alignment(); + size_t nr_boxes = boxes[1]; + m_workspace_overlap_mask_bytes = + nr_boxes * DIVUP(nr_boxes, 64) * sizeof(uint64_t); + m_workspace_overlap_mask_bytes_align = + get_aligned_power2(m_workspace_overlap_mask_bytes, align); + m_workspace_rm_mask_bytes = DIVUP(nr_boxes, 64) * sizeof(uint64_t); + } + +public: + size_t get_workspace_size(const NMSKeep* opr, + const TensorShape& boxes) override { + init(opr, boxes); + return m_workspace_overlap_mask_bytes_align + m_workspace_rm_mask_bytes; + } + + void exec(const NMSKeep* opr, const DeviceTensorND& inp, + const DeviceTensorND& out_idx, const DeviceTensorND& out_size, + const DeviceTensorND& workspace) override; +}; + +void NMSKeep::CUDAKern::exec(const NMSKeep* opr, const DeviceTensorND& inp, + const DeviceTensorND& out_idx, + const DeviceTensorND& out_size, + const DeviceTensorND& workspace) { + // NOTE: input comp node might be different from output comp node (for + // example, CUDA stream may be modified to overlap computations); a + // SingleCNOperatorNodeBase is expected to execute on a single comp node, + // and the comp node is defined as the output comp node + CompNode comp_node = out_idx.comp_node(); + + // comp ndoe is also accessible from SingleCNOperatorNode + mgb_assert(comp_node == opr->comp_node()); + + // CompNodeEnv contains platform-specific properties of a CompNode + auto&& cuda_env = CompNodeEnv::from_comp_node(comp_node).cuda_env(); + mgb_assert(cuda_env.device_prop.warpSize == 32, "invalid warp size: %d", + cuda_env.device_prop.warpSize); + auto stream = cuda_env.stream; + + init(opr, inp.shape()); + + auto inp_ptr = inp.ptr(); + auto dev_overlap_mask = reinterpret_cast(workspace.raw_ptr()), + dev_rm_mask = reinterpret_cast( + workspace.raw_ptr() + m_workspace_overlap_mask_bytes_align); + auto out_idx_ptr = reinterpret_cast(out_idx.ptr()), + out_size_ptr = reinterpret_cast(out_size.ptr()); + size_t batch = inp.shape(0), nr_boxes = inp.shape(1); + + MGB_CUDA_CHECK(cudaMemsetAsync(dev_overlap_mask, 0, + m_workspace_overlap_mask_bytes, stream)); + + auto max_output = opr->param().max_output; + + for (size_t i = 0; i < batch; ++i) { + nms::launch_gen_mask(nr_boxes, opr->param().iou_thresh, + inp_ptr + i * nr_boxes * 4, DIVUP(nr_boxes, 64), + dev_overlap_mask, stream); + + MGB_CUDA_CHECK(cudaMemsetAsync(dev_rm_mask, 0, + m_workspace_rm_mask_bytes, stream)); + nms::launch_gen_indices(nr_boxes, max_output, DIVUP(nr_boxes, 64), + dev_overlap_mask, dev_rm_mask, + out_idx_ptr + i * max_output, out_size_ptr + i, + stream); + } +} + +#endif // MGB_CUDA for CUDAKern +// f}}} cuda kernel ends + +// f{{{ cpu kernel begins +class NMSKeep::CPUKern final : public Kern { +public: + ~CPUKern() = default; + + size_t get_workspace_size(const NMSKeep*, + const TensorShape& boxes) override { + return nms::cpu_kern_workspace(boxes.shape[1]); + } + + void exec(const NMSKeep* opr, const DeviceTensorND& inp, + const DeviceTensorND& out_idx, const DeviceTensorND& out_size, + const DeviceTensorND& workspace) override; +}; +void NMSKeep::CPUKern::exec(const NMSKeep* opr, const DeviceTensorND& inp, + const DeviceTensorND& out_idx, + const DeviceTensorND& out_size, + const DeviceTensorND& workspace) { + // See CUDAKern::exec for more explanation on output comp nodes. + CompNode comp_node = out_idx.comp_node(); + + auto inp_ptr = inp.ptr(); + auto out_idx_ptr = reinterpret_cast(out_idx.ptr()), + out_size_ptr = reinterpret_cast(out_size.ptr()); + size_t batch = inp.shape(0), nr_boxes = inp.shape(1); + auto param = opr->param(); + + auto workspace_ptr = workspace.raw_ptr(); + + // NOTE: we must copy all the params into the kernel closure since it would + // be dispatched on a different thread + auto kern = [=]() { + for (size_t i = 0; i < batch; ++i) { + nms::cpu_kern(nr_boxes, param.max_output, param.iou_thresh, + inp_ptr + i * nr_boxes * 4, + out_idx_ptr + i * param.max_output, out_size_ptr + i, + workspace_ptr); + } + }; + + // The kernel should not be invoked + CompNodeEnv::from_comp_node(comp_node).cpu_env().dispatch(kern); +} + +// f}}} cpu kernel ends + +NMSKeep::NMSKeep(VarNode* boxes, const Param& param, + const OperatorNodeConfig& config) + : Super(boxes->owner_graph(), // owner graph + config, // OperatorNodeConfig + "nms_keep", // opr type name (used for generating opr name) + {boxes} // input vars for generating opr name + ), + m_param{param} { + mgb_assert(boxes->dtype() == dtype::Float32(), + "input should be float32; got %s", boxes->dtype().name()); + // setup m_kern according to device type + switch (boxes->comp_node().device_type()) { +#if MGB_CUDA + case CompNode::DeviceType::CUDA: + m_kern = std::make_unique(); + break; +#endif + case CompNode::DeviceType::CPU: + m_kern = std::make_unique(); + break; + default: + mgb_throw(MegBrainError, "NMSKeep: unsupported device type: %s", + boxes->comp_node().to_string().c_str()); + } + + add_input({boxes}); + add_output("indices")->dtype(dtype::Int32()); + add_output("sizes")->dtype(dtype::Int32()); + cg::add_workspace_output(this); // workspace is also an output var + + // make the graph deduplication system consider m_param (so two oprs with + // same input vars but different param values would not be deduplicated) + add_equivalence_component>(&m_param); +} + +// impl dtor after Kern is defined +NMSKeep::~NMSKeep() noexcept = default; + +mgb::SymbolVar NMSKeep::make(SymbolVar boxes, const Param& param, + const OperatorNodeConfig& config) { + // SymbolVar is just a wrapper of VarNode*, with overloaded methods such as + // operator+() + auto bvar = boxes.node(); + // insert opr into the owner graph of boxes + return boxes.insert_single_output_opr(bvar, param, config); +} + +void NMSKeep::get_output_var_shape(const TensorShapeArray& inp_shape, + TensorShapeArray& out_shape) const { + auto boxes = inp_shape.at(0); + mgb_assert(boxes.ndim == 3 && boxes.shape[2] == 4, "invalid box shape: %s", + boxes.to_string().c_str()); + + // out_shape should match the outputs added in the constructor + mgb_assert(out_shape.size() == 3); + + auto batch = boxes[0]; + out_shape[0] = {batch, m_param.max_output}; // indices + out_shape[1] = {batch}; // sizes + out_shape[2] = {m_kern->get_workspace_size(this, boxes)}; // workspace +} + +void NMSKeep::add_input_layout_constraint() { + input(0)->add_layout_constraint_contiguous(); +} + +void NMSKeep::scn_do_execute() { + DeviceTensorND empty_workspace; + m_kern->exec(this, input(0)->dev_tensor(), output(0)->dev_tensor(), + output(1)->dev_tensor(), + // if workspace size is 0, output(2) would be invalid and its + // dev_tensor() can not be accessed + output(2)->dev_tensor_valid() ? output(2)->dev_tensor() + : empty_workspace); +} + +#if MGB_ENABLE_FBS_SERIALIZATION + +namespace mgb { +namespace serialization { +namespace fbs { + +template <> +struct ParamConverter { + using FlatBufferType = param::NMSKeep; + static opr::standalone::NMSKeep::Param to_param(const FlatBufferType* fb) { + return {fb->iou_thresh(), fb->max_output()}; + } + static flatbuffers::Offset to_flatbuffer( + flatbuffers::FlatBufferBuilder& builder, + const opr::standalone::NMSKeep::Param& p) { + return param::CreateNMSKeep(builder, p.iou_thresh, p.max_output); + } +}; + +} // namespace fbs +} // namespace serialization +} // namespace mgb + +#endif + +namespace mgb { + +void _hack_pull_in_nms_opr_object() {} + +} // namespace mgb + +// register serialization: the default implementation uses Opr::Param; it +// requires Param::TAG, Opr::param() and Opr::make(..., param) to exist +// Note: the second param 1 here means that this operator has one input +using NMSKeepMGB = NMSKeep; +MGB_SEREG_OPR(NMSKeepMGB, 1); + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/opr/include/megbrain/opr/standalone/nms_opr.h b/src/opr/include/megbrain/opr/standalone/nms_opr.h new file mode 100644 index 0000000000000000000000000000000000000000..a15e9f0ce812b0431eef6d1cecc9101132fd20e1 --- /dev/null +++ b/src/opr/include/megbrain/opr/standalone/nms_opr.h @@ -0,0 +1,62 @@ +#include "megbrain/graph.h" +#include "megbrain_build_config.h" + +namespace mgb { +namespace opr { +namespace standalone { + +/*! + * \brief generate indices of boxes to be kept after NMS + * + * See the docs in the python operator + */ +MGB_DEFINE_OPR_CLASS(NMSKeep, + cg::SingleCNOutshapePureByInshapeOprBase) // { +public: + struct Param { + //! TAG is used by the serializer to check Param type; here we + //! just use a random number. To generate such a random number, + //! run `xxd -l4 -p /dev/urandom` + static constexpr uint32_t TAG = 0x988a7630u; + + float iou_thresh; //!< IoU threshold for overlapping + uint32_t max_output; //!< max number of output boxes per batch + }; + + + NMSKeep(VarNode * boxes, const Param& param, + const OperatorNodeConfig& config); + ~NMSKeep() noexcept; + + //! factory method to insert the operator into a graph + static SymbolVar make(SymbolVar boxes, const Param& param, + const OperatorNodeConfig& config = {}); + + const Param& param() const { return m_param; } + +private: + const Param m_param; + + class Kern; + class CUDAKern; + class CPUKern; + + std::unique_ptr m_kern; + + //! override output shape infer func provided by + //! SingleCNOutshapePureByInshapeOprBase + void get_output_var_shape(const TensorShapeArray& inp_shape, + TensorShapeArray& out_shape) const override; + + //! this opr requires inputs to be contiguous + void add_input_layout_constraint() override; + + //! execute the operator + void scn_do_execute() override; +}; + +} // namespace standalone +} // namespace opr +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/opr/test/standalone/nms.cpp b/src/opr/test/standalone/nms.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f53b05936cd8e718440a57d931effc4e442d4c2a --- /dev/null +++ b/src/opr/test/standalone/nms.cpp @@ -0,0 +1,77 @@ +/** + * \file src/opr/test/standalone/nms.cpp + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + */ + +#include "megbrain/opr/standalone/nms_opr.h" +#include "megbrain/test/helper.h" +#include "megbrain/opr/io.h" +#include "megbrain/opr/tensor_manip.h" +#include "megbrain/opr/tensor_gen.h" +#include + +using namespace mgb; + +namespace { + +void run_on_comp_node(const char* cn_name) { + auto cn = CompNode::load(cn_name); + auto graph = ComputingGraph::make(); + auto host_x = std::make_shared(cn, TensorShape{1, 2, 4}, + dtype::Float32{}); + auto ptr = host_x->ptr(); + ptr[0] = 0.; ptr[1] = 0.; + ptr[2] = 2.; ptr[3] = 2.; + ptr[4] = 0.5; ptr[5] = 0.5; + ptr[6] = 1.5; ptr[7] = 1.5; + + auto x = opr::Host2DeviceCopy::make(*graph, host_x); + + { + auto idx = opr::standalone::NMSKeep::make(x, {0.2, 16}); + auto size = idx.node()->owner_opr()->output(1); + HostTensorND host_idx, host_size; + auto func = graph->compile({make_callback_copy(idx, host_idx), + make_callback_copy(size, host_size)}); + func->execute().wait(); + auto idx_ptr = host_idx.ptr(); + auto size_ptr = host_size.ptr(); + ASSERT_EQ(size_ptr[0], 1); + ASSERT_EQ(idx_ptr[0], 0); + } + { + auto idx = opr::standalone::NMSKeep::make(x, {0.5, 16}); + auto size = idx.node()->owner_opr()->output(1); + HostTensorND host_idx, host_size; + auto func = graph->compile({make_callback_copy(idx, host_idx), + make_callback_copy(size, host_size)}); + func->execute().wait(); + auto idx_ptr = host_idx.ptr(); + auto size_ptr = host_size.ptr(); + ASSERT_EQ(size_ptr[0], 2); + ASSERT_EQ(idx_ptr[0], 0); + ASSERT_EQ(idx_ptr[1], 1); + } +} + +} + +TEST(TestOprNMS, CPU) { + run_on_comp_node("cpu0"); +} + +TEST(TestOprNMS, GPU) { + REQUIRE_GPU(1); + run_on_comp_node("gpu0"); +} + +#if MGB_ENABLE_EXCEPTION +TEST(TestOprNMS, InvalidInput) { + HostTensorGenerator<> gen; + auto graph = ComputingGraph::make(); + auto host_x = gen({1, 9, 5}); + auto x = opr::Host2DeviceCopy::make(*graph, host_x); + ASSERT_ANY_THROW(opr::standalone::NMSKeep::make(x, {1., 1})); +} +#endif // MGB_ENABLE_EXCEPTION