From 5257991e68e8746b8c3539b29948369ae30fa64b Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Fri, 19 Jun 2020 14:41:21 +0800 Subject: [PATCH] fix(jit): fix jit doc and add NCHW44_DOT GitOrigin-RevId: 5f5feae8e727dd111615022f2a21c7ede647156a --- python_module/megengine/jit/__init__.py | 42 +++++++++++++++++++------ 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/python_module/megengine/jit/__init__.py b/python_module/megengine/jit/__init__.py index a2ce8238c..5e2e495d3 100644 --- a/python_module/megengine/jit/__init__.py +++ b/python_module/megengine/jit/__init__.py @@ -442,17 +442,38 @@ class trace: Serialize trace to file system. :param fpath: positional only argument. Path of output file. - :param arg_names: names of the input tensors in the traced function - :param append: whether output is appended to ``fpath`` - :param f16_io_f32_comp: whether to use float16 for I/O between oprs and use + :param arg_names: names of the input tensors in the traced function. + :param append: whether output is appended to ``fpath``. + :param optimize_for_inference: whether to enable optimize_for_inference + pass before dump. + + :param enable_io16xc32: whether to use float16 for I/O between oprs and use float32 as internal computation precision. Note the output var would be - changed to float16 - :param f16_io_comp: whether to use float16 for both I/O and computation - precision - :param use_nhwcd4: whether to use NHWCD4 data format. This is faster on some - OpenCL devices - :param fuse_conv_bias_nonlinearity: whether to fuse conv+bias+nonlinearty - into one opr. This is supported only in NHWCD4 format. + changed to float16. + :param enable_ioc16: whether to use float16 for both I/O and computation + precision. + + :param enable_hwcd4: whether to use NHWCD4 data layout. This is faster on some + OpenCL backend. + :param enable_nchw88: whether to use NCHW4 data layout. it currently + used in X86 AVX backend. + :param enable_nchw44: whether to use NCHW4 data layout. it currently + used in arm backend. + :param enable_nchw44_dot: whether to use NCHW4 data layout. it currently + used in armv8.2+dotprod backend. + :param enable_nchw4: whether to use NCHW4 data layout. it currently + used in nvidia backend(based on cudnn). + :param enable_nchw32 whether to use NCHW32 data layout. it currently + used in nvidia backend with tensorcore(based on cudnn). + :param enable_chwn4 whether to use CHWN4 data layout. it currently + used in nvidia backend with tensorcore. + + :param enable_fuse_conv_bias_nonlinearity: whether to fuse conv+bias+nonlinearty + into one opr. + :param enable_fuse_conv_bias_with_z: whether to fuse conv_bias with z + input for inference on nvidia backend(this optimization pass will + result in mismatch of the precision of output of training and + inference) """ if self._status != self._FINISHED: raise ValueError("not traced") @@ -475,6 +496,7 @@ class trace: "enable_nchw88": "use_nchw88", "enable_nchw32": "use_nchw32", "enable_nchw44": "use_nchw44", + "enable_nchw44_dot": "use_nchw44_dot", "enable_chwn4": "use_chwn4", "enable_fuse_conv_bias_nonlinearity": "fuse_conv_bias_nonlinearity", "enable_fuse_conv_bias_with_z": "fuse_conv_bias_with_z", -- GitLab