From 55042195d4454ac349dafee7787f998120982e2f Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Fri, 13 Nov 2020 12:39:30 +0800 Subject: [PATCH] chore(winograd): add Convolutionv2 param GitOrigin-RevId: 1a9e2ea340f6eb37b6a03db53038ccc053e77635 --- dnn/scripts/opr_param_defs.py | 205 ++++++++++++++++++++---- dnn/src/cuda/convolution/opr_impl.cpp | 1 - dnn/src/x86/conv_bias/opr_impl.cpp | 1 - imperative/src/test/imperative.cpp | 2 +- src/gopt/impl/inference.cpp | 1 - src/gopt/impl/tensor_reformat.cpp | 6 +- src/opr/impl/dnn/dnn.oprdecl | 16 +- src/opr/impl/dnn/dnn.sereg.h | 197 ++++++++++++++++++++--- src/opr/impl/imgproc.oprdecl | 10 +- src/opr/impl/imgproc.sereg.h | 89 ++++++++-- src/serialization/impl/opr_registry.cpp | 1 - 11 files changed, 448 insertions(+), 81 deletions(-) diff --git a/dnn/scripts/opr_param_defs.py b/dnn/scripts/opr_param_defs.py index 6a899d01b..9f02b7518 100755 --- a/dnn/scripts/opr_param_defs.py +++ b/dnn/scripts/opr_param_defs.py @@ -53,7 +53,7 @@ pdef('Axis').add_fields('int32', 'axis', 0) 'of convolution using CUDA/SASS. The channels are splitted to groups of 4 channels.')) ) -(pdef('Convolution', version=1). +(pdef('Convolution', version=1, is_legacy=True). add_enum_alias('Mode', 'ConvolutionV0'). add_fields( 'uint32', @@ -78,6 +78,39 @@ pdef('Axis').add_fields('int32', 'axis', 0) name_field='compute_mode') ) +(pdef('Convolution', version=2). + add_enum_alias('Mode', 'ConvolutionV0'). + add_fields( + 'uint32', + Doc('pad_h', 'padding on one side on the first dimension'), 0, + Doc('pad_w', 'padding on one side on the second dimension'), 0, + Doc('stride_h', 'kernel stride on the first dimension'), 1, + Doc('stride_w', 'kernel stride on the second dimension'), 1, + Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) ' + 'on the second dimension'), 1, + Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) ' + 'on the second dimension'), 1 + ). + add_enum_alias('Sparse', 'ConvolutionV0'). + add_enum(Doc('Format', 'convolution data/filter/output format; see ' + ':class:`RelayoutFormat` for more details'), + 'NCHW', 'NHWC', 'NHWCD4', 'NCHW4', 'NCHW8', 'NCHW32', 'NCHW88', + 'NCHW44','NCHW44_DOT', + Doc('NCHW4_NCHW32', 'NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is nchw32 layout'), + Doc('NCHW32_NCHW4', 'NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is nchw4 layout'), + Doc('NCHW4_NCHW', 'NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw layout'), + Doc('NHWC_NCHW', 'NHWC_NCHW means input tensors are nhwc layout, ' + 'output tensor is nchw layout'), + Doc('NHWC_NCHW4_IC_SMALL', 'NHWC_NCHW4_IC_SMALL means input tensors are nhwc(c < 4) layout, ' + 'output tensor is nchw4 layout, padding c=4'), + Doc('NCHW_NCHW4_IC_SMALL', 'NCHW_NCHW4_IC_SMALL means input tensors are nchw(c < 4) layout, ' + 'output tensor is nchw4 layout, padding c=4'), + Doc('CHWN4', 'CHWN4 is currently only used on Nvidia platform for fast implementation ' + 'of convolution using CUDA/SASS. The channels are splitted to groups of 4 channels.')). + add_enum_alias('ComputeMode', 'ConvolutionV1',name_field='compute_mode') + ) + + (pdef('MaskPropagate'). add_fields( 'uint32', @@ -137,10 +170,10 @@ pdef('Axis').add_fields('int32', 'axis', 0) 'on the second dimension'), 1, Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) ' 'on the second dimension'), 1). - add_enum_alias('ComputeMode', 'Convolution', name_field='compute_mode') + add_enum_alias('ComputeMode', 'ConvolutionV1', name_field='compute_mode') ) -(pdef('ConvBias', 'active(conv(x, w) + bias)', version=3). +(pdef('ConvBias', 'active(conv(x, w) + bias)', version=3, is_legacy=True). add_enum_alias('NonlineMode', 'ConvBiasV0'). add_enum_alias('Mode', 'ConvolutionV0'). add_enum_alias('Sparse', 'ConvolutionV0'). @@ -156,9 +189,26 @@ pdef('Axis').add_fields('int32', 'axis', 0) Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) ' 'on the second dimension'), 1, Doc('output_block_size', 'detail meaning \see winograd in conv bias'), 0). - add_enum_alias('ComputeMode', 'Convolution', name_field='compute_mode') + add_enum_alias('ComputeMode', 'ConvolutionV1', name_field='compute_mode') ) +(pdef('ConvBias', 'active(conv(x, w) + bias)', version=4). + add_enum_alias('NonlineMode', 'ConvBiasV0'). + add_enum_alias('Mode', 'ConvolutionV0'). + add_enum_alias('Sparse', 'ConvolutionV0'). + add_enum_alias('Format', 'Convolution'). + add_fields( + 'uint32', + Doc('pad_h', 'padding on one side on the first dimension'), 0, + Doc('pad_w', 'padding on one side on the second dimension'), 0, + Doc('stride_h', 'kernel stride on the first dimension'), 1, + Doc('stride_w', 'kernel stride on the second dimension'), 1, + Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) ' + 'on the second dimension'), 1, + Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) ' + 'on the second dimension'), 1). + add_enum_alias('ComputeMode', 'ConvolutionV1', name_field='compute_mode') + ) (pdef('SeparableConv'). add_enum_alias('Mode', 'ConvolutionV0'). add_enum('BorderMode', 'BORDER_REPLICATE', 'BORDER_REFLECT', @@ -172,7 +222,7 @@ pdef('Axis').add_fields('int32', 'axis', 0) add_fields('uint32', 'pad_h', 0, 'pad_w', 0, 'stride_h', 1, 'stride_w', 1, 'window_h', 3, 'window_w', 3)) -(pdef('Pooling'). +(pdef('Pooling', version=0, is_legacy=True). add_enum( 'Mode', Doc('MAX', 'maximum value inside pooling window'), @@ -188,11 +238,23 @@ pdef('Axis').add_fields('int32', 'axis', 0) add_enum_alias('Format', 'ConvolutionV0') ) -(pdef('AdaptivePooling'). - add_enum_alias('Mode', 'Pooling'). +(pdef('Pooling', version=1). + add_enum_alias('Mode','PoolingV0'). + add_fields('uint32', 'pad_h', 0, 'pad_w', 0, 'stride_h', 2, 'stride_w', 2, + 'window_h', 2, 'window_w', 2). + add_enum_alias('Format', 'Convolution') + ) + +(pdef('AdaptivePooling', version=0,is_legacy=True). + add_enum_alias('Mode', 'PoolingV0'). add_enum_alias('Format', 'ConvolutionV0') ) +(pdef('AdaptivePooling', version=1). + add_enum_alias('Mode', 'PoolingV0'). + add_enum_alias('Format', 'Convolution') + ) + (pdef('LRN', 'see ImageNet Classification with Deep Convolutional Neural Networks for' ' meaning of the fields'). @@ -239,7 +301,7 @@ BORDER_MODES = [Doc('REPLICATE', 'aaaaaa|abcdefgh|hhhhhhh'), Doc('CONSTANT', 'iiiiii|abcdefgh|iiiiiii'), Doc('TRANSPARENT', ''), Doc('ISOLATED', '')] -(pdef('WarpPerspective', version=1). +(pdef('WarpPerspective', version=1, is_legacy=True). add_enum('InterpolationMode', *INTERP_MODES, name_field='imode', default=1, member_alias=[(i, 'INTER_{}'.format(i)) for i in INTERP_MODES] @@ -251,6 +313,13 @@ BORDER_MODES = [Doc('REPLICATE', 'aaaaaa|abcdefgh|hhhhhhh'), add_enum_alias('Format', 'ConvolutionV0'). add_fields('float32', Doc('border_val', 'used for CONSTANT bmode'), '.0f')) +(pdef('WarpPerspective', version=2). + add_enum_alias('InterpolationMode','WarpPerspectiveV1',name_field="imode"). + add_enum_alias('BorderMode','WarpPerspectiveV1',name_field="bmode"). + add_enum_alias('Format', 'Convolution'). + add_fields('float32', Doc('border_val', 'used for CONSTANT bmode'), '.0f')) + + pdef('SpatialTfGridGenerator').add_enum('Mode', 'AFFINE') pdef('SpatialTfSampler').add_enum('Mode', 'BILINEAR') @@ -420,9 +489,12 @@ pdef('ElemwiseMultiType').add_enum( pdef('PowC', 'power with constant exponent').add_fields('float32', 'exp', 0) -(pdef('DctChannelSelect', '2d discrete cosine transform').add_enum_alias('Format', 'ConvolutionV0'). +(pdef('DctChannelSelect', '2d discrete cosine transform', version=0, is_legacy=True).add_enum_alias('Format', 'ConvolutionV0'). add_enum('FastImpl', 'NONE', 'FIX_32_MASK').add_fields('int32', 'dct_block_size', 8)) +(pdef('DctChannelSelect', '2d discrete cosine transform', version=1).add_enum_alias('Format', 'Convolution'). + add_enum_alias('FastImpl', 'DctChannelSelectV0').add_fields('int32', 'dct_block_size', 8)) + (pdef('MatrixMul', version=0, is_legacy=True). add_fields('bool', 'transposeA', 'false', 'transposeB', 'false'). add_enum('DataType', @@ -695,34 +767,51 @@ pdef('UniformRNG').add_fields('uint64', 'seed', 0) name_field = 'mode')) (pdef('WarpAffine', version=0, is_legacy=True) - .add_enum_alias('InterpolationMode', 'WarpPerspective', name_field='imode') - .add_enum_alias('BorderMode', 'WarpPerspective', name_field='border_mode') + .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode') + .add_enum_alias('BorderMode', 'WarpPerspectiveV1', name_field='border_mode') .add_fields('float32', Doc('border_val', 'used for CONSTANT bmode'), '.0f')) -(pdef('WarpAffine', version=1) - .add_enum_alias('InterpolationMode', 'WarpPerspective', name_field='imode') - .add_enum_alias('BorderMode', 'WarpPerspective', name_field='border_mode') +(pdef('WarpAffine', version=1, is_legacy=True) + .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode') + .add_enum_alias('BorderMode', 'WarpPerspectiveV1', name_field='border_mode') .add_fields('float32', Doc('border_val', 'used for CONSTANT bmode'), '.0f') .add_enum_alias('Format', 'ConvolutionV0', default=1)) +(pdef('WarpAffine', version=2) + .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode') + .add_enum_alias('BorderMode', 'WarpPerspectiveV1', name_field='border_mode') + .add_fields('float32', Doc('border_val', 'used for CONSTANT bmode'), '.0f') + .add_enum_alias('Format', 'Convolution', default=1)) + + (pdef('GaussianBlur') - .add_enum_alias('BorderMode', 'WarpPerspective', name_field='border_mode') + .add_enum_alias('BorderMode', 'WarpPerspectiveV1', name_field='border_mode') .add_fields('uint32', 'kernel_height', 0, 'kernel_width', 0) .add_fields('float32','sigma_x', '0.f', 'sigma_y', '0.f')) (pdef('Resize', version=0, is_legacy=True) - .add_enum_alias('InterpolationMode', 'WarpPerspective', name_field='imode')) + .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode')) -(pdef('Resize', version=1) - .add_enum_alias('InterpolationMode', 'WarpPerspective', name_field='imode') +(pdef('Resize', version=1, is_legacy=True) + .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode') .add_enum_alias('Format', 'ConvolutionV0', default=1)) -(pdef('Remap', version=0) - .add_enum_alias('InterpolationMode', 'WarpPerspective', name_field='imode') - .add_enum_alias('BorderMode', 'WarpPerspective', name_field='border_type') +(pdef('Resize', version=2) + .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode') + .add_enum_alias('Format', 'Convolution', default=1)) + +(pdef('Remap', version=0,is_legacy=True) + .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode') + .add_enum_alias('BorderMode', 'WarpPerspectiveV1', name_field='border_type') .add_enum_alias('Format', 'ConvolutionV0', default=1) .add_fields('float32', 'scalar', '0.f')) +(pdef('Remap', version=1) + .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode') + .add_enum_alias('BorderMode', 'WarpPerspectiveV1', name_field='border_type') + .add_enum_alias('Format', 'Convolution', default=1) + .add_fields('float32', 'scalar', '0.f')) + (pdef('Convolution3D'). add_enum('Mode', 'CROSS_CORRELATION', 'CONVOLUTION'). add_fields( @@ -879,13 +968,19 @@ when the ``I`` suffix is present. ) -(pdef('SeparableFilter'). +(pdef('SeparableFilter', version=0, is_legacy=True). add_enum_alias('Format', 'ConvolutionV0'). - add_enum_alias('BorderMode', 'WarpPerspective'). + add_enum_alias('BorderMode', 'WarpPerspectiveV1'). + add_fields('bool', 'is_symm_kernel', 'true'). + add_fields('uint32', 'ksize_h', 3, 'ksize_w', 3, 'anchor_h', 1, 'anchor_w', 1)) + +(pdef('SeparableFilter', version=1). + add_enum_alias('Format', 'Convolution'). + add_enum_alias('BorderMode', 'WarpPerspectiveV1'). add_fields('bool', 'is_symm_kernel', 'true'). add_fields('uint32', 'ksize_h', 3, 'ksize_w', 3, 'anchor_h', 1, 'anchor_w', 1)) -(pdef('LocalShare', 'Local share convolution'). +(pdef('LocalShare', 'Local share convolution',version=0, is_legacy=True). add_enum_alias('Mode', 'ConvolutionV0'). add_fields( 'uint32', @@ -902,10 +997,31 @@ when the ``I`` suffix is present. ). add_enum_alias('Sparse', 'ConvolutionV0'). add_enum_alias('Format', 'ConvolutionV0'). - add_enum_alias('ComputeMode', 'Convolution') + add_enum_alias('ComputeMode', 'ConvolutionV1') ) -(pdef('ROIAlign'). +(pdef('LocalShare', 'Local share convolution', version=1). + add_enum_alias('Mode', 'ConvolutionV0'). + add_fields( + 'uint32', + Doc('pad_h', 'padding on one side on the first dimension'), 0, + Doc('pad_w', 'padding on one side on the second dimension'), 0, + Doc('stride_h', 'kernel stride on the first dimension'), 1, + Doc('stride_w', 'kernel stride on the second dimension'), 1, + Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) ' + 'on the second dimension'), 1, + Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) ' + 'on the second dimension'), 1, + Doc('spatial_groups_h', 'spatial groups on the first dimension'), 1, + Doc('spatial_groups_w', 'spatial groups on the second dimension'), 1 + ). + add_enum_alias('Sparse', 'ConvolutionV0'). + add_enum_alias('Format', 'Convolution'). + add_enum_alias('ComputeMode', 'ConvolutionV1') + ) + + +(pdef('ROIAlign',version=0,is_legacy=True). add_enum('Mode', 'MAX', 'AVERAGE', name_field='mode'). add_enum_alias('Format', 'ConvolutionV0'). add_fields('float32', 'spatial_scale', '1.0'). @@ -916,6 +1032,19 @@ when the ``I`` suffix is present. 'sample_height', '2', 'sample_width', '2') ) + +(pdef('ROIAlign', version=1). + add_enum_alias('Mode', 'ROIAlignV0', name_field='mode'). + add_enum_alias('Format', 'Convolution'). + add_fields('float32', 'spatial_scale', '1.0'). + add_fields('float32', 'offset', '0.0'). + add_fields('uint32', + 'pooled_height', '1', + 'pooled_width', '1', + 'sample_height', '2', + 'sample_width', '2') + ) + (pdef('DeformablePSROIPooling'). add_fields('bool', 'no_trans', 'true'). add_fields('float32', 'spatial_scale', 1, @@ -926,7 +1055,7 @@ when the ``I`` suffix is present. Doc('part_size', 'size of each deformable part'), 1, Doc('sample_per_part', 'sample count of each bbox'), 1)) -(pdef('BatchConvBias', 'Batch convolution (unshare weights on the batch dimension)'). +(pdef('BatchConvBias', 'Batch convolution (unshare weights on the batch dimension)',version=0,is_legacy=True). add_enum_alias('NonlineMode', 'ConvBiasV0'). add_enum_alias('Mode', 'ConvolutionV0'). add_fields( @@ -942,8 +1071,28 @@ when the ``I`` suffix is present. ). add_enum_alias('Sparse', 'ConvolutionV0'). add_enum_alias('Format', 'ConvolutionV0'). - add_enum_alias('ComputeMode', 'Convolution', name_field="compute_mode") + add_enum_alias('ComputeMode', 'ConvolutionV1', name_field="compute_mode") ) + +(pdef('BatchConvBias', 'Batch convolution (unshare weights on the batch dimension)',version=1). + add_enum_alias('NonlineMode', 'ConvBiasV0'). + add_enum_alias('Mode', 'ConvolutionV0'). + add_fields( + 'uint32', + Doc('pad_h', 'padding on one side on the first dimension'), 0, + Doc('pad_w', 'padding on one side on the second dimension'), 0, + Doc('stride_h', 'kernel stride on the first dimension'), 1, + Doc('stride_w', 'kernel stride on the second dimension'), 1, + Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) ' + 'on the second dimension'), 1, + Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) ' + 'on the second dimension'), 1, + ). + add_enum_alias('Sparse', 'ConvolutionV0'). + add_enum_alias('Format', 'Convolution'). + add_enum_alias('ComputeMode', 'ConvolutionV1', name_field="compute_mode") + ) + (pdef('FakeQuant'). add_fields('int32','qmin','-2147483648'). add_fields('int32','qmax','2147483647') diff --git a/dnn/src/cuda/convolution/opr_impl.cpp b/dnn/src/cuda/convolution/opr_impl.cpp index 66ea1c275..725103be2 100644 --- a/dnn/src/cuda/convolution/opr_impl.cpp +++ b/dnn/src/cuda/convolution/opr_impl.cpp @@ -68,7 +68,6 @@ ConvolutionForwardImpl::conv_bias_extra_data(const TensorLayout& src, conv_param.stride_w, conv_param.dilate_h, conv_param.dilate_w, - 0, conv_param.compute_mode}; ret.convbias_opr->execution_policy() = {this->execution_policy().algo}; return ret; diff --git a/dnn/src/x86/conv_bias/opr_impl.cpp b/dnn/src/x86/conv_bias/opr_impl.cpp index aa0180c58..b95fbd3bf 100644 --- a/dnn/src/x86/conv_bias/opr_impl.cpp +++ b/dnn/src/x86/conv_bias/opr_impl.cpp @@ -173,7 +173,6 @@ SmallVector ConvBiasImpl::suggest_algo_category_order( auto FH = param.filter_meta.spatial[0]; auto FW = param.filter_meta.spatial[1]; //! TODO: now winograd only support fast-run - //! nchw88 use mkl-dnn which algo is direct if (param.filter_meta.format == param::ConvBias::Format::NCHW88) { return {AlgoCategory::DIRECT, AlgoCategory::IM2COL}; diff --git a/imperative/src/test/imperative.cpp b/imperative/src/test/imperative.cpp index 4151dde3a..5a5ccf071 100644 --- a/imperative/src/test/imperative.cpp +++ b/imperative/src/test/imperative.cpp @@ -35,7 +35,7 @@ TEST(TestImperative, APlusB) { } TEST(TestImperative, Convolution) { - auto op = OprAttr::make("ConvolutionV1"); + auto op = OprAttr::make("ConvolutionV2"); auto&& attr = op->cast_final_safe(); using Param = opr::Convolution::Param; using Policy = opr::Convolution::ExecutionPolicy; diff --git a/src/gopt/impl/inference.cpp b/src/gopt/impl/inference.cpp index 881a7c548..e8c3c988a 100644 --- a/src/gopt/impl/inference.cpp +++ b/src/gopt/impl/inference.cpp @@ -1752,7 +1752,6 @@ void FuseConvBiasNonlinPass::apply(OptState& state) const { param.stride_w, param.dilate_h, param.dilate_w, - 0, param.compute_mode}; }; diff --git a/src/gopt/impl/tensor_reformat.cpp b/src/gopt/impl/tensor_reformat.cpp index 0050c65ad..9a8cd4515 100644 --- a/src/gopt/impl/tensor_reformat.cpp +++ b/src/gopt/impl/tensor_reformat.cpp @@ -1945,7 +1945,7 @@ void EnableNchwxxPass::fill_opr_convert_fun(size_t pack_c_size) { megdnn::param::ConvBias::Format conv_bias_format = megdnn::param::ConvBias::Format::NCHW88; megdnn::param::Convolution::Format conv_format = - megdnn::param::ConvolutionV0::Format::NCHW88; + megdnn::param::Convolution::Format::NCHW88; megdnn::param::Pooling::Format pooling_format = megdnn::param::Pooling::Format::NCHW88; std::string convter_pass_name = "conv_format_nchw88"; @@ -1958,7 +1958,7 @@ void EnableNchwxxPass::fill_opr_convert_fun(size_t pack_c_size) { src_to_nchwxx_mode = RelayoutMode::NCHW_TO_NCHW4; src_to_nchw_mode = RelayoutMode::NCHW4_TO_NCHW; conv_bias_format = megdnn::param::ConvBias::Format::NCHW44; - conv_format = megdnn::param::ConvolutionV0::Format::NCHW44; + conv_format = megdnn::param::Convolution::Format::NCHW44; pooling_format = megdnn::param::Pooling::Format::NCHW44; convter_pass_name = "conv_format_nchw44"; } @@ -2360,7 +2360,7 @@ EnableNchw44DotPass::make_nchw44_dot_converter() { struct TestTransResult { TransType trans_type; RelayoutMode relayout_mod; - megdnn::param::ConvolutionV0::Format conv_format; + megdnn::param::Convolution::Format conv_format; }; constexpr size_t pack_c_size = 4_z; auto test_trans_nchw44_dot = diff --git a/src/opr/impl/dnn/dnn.oprdecl b/src/opr/impl/dnn/dnn.oprdecl index a78d418bf..6c05cb70f 100644 --- a/src/opr/impl/dnn/dnn.oprdecl +++ b/src/opr/impl/dnn/dnn.oprdecl @@ -18,7 +18,7 @@ decl_opr('Convolution', params=[('param', 'Convolution'), ('execution_polity', 'ExecutionPolicy')], desc='batched convolution on channeled 2D images', - version=1, has_out_dtype=True) + version=2, has_out_dtype=True) decl_opr('ConvolutionBackwardData', pyname='deconvolution_v0', @@ -51,7 +51,7 @@ decl_opr('ConvolutionBackwardData', ], desc='batched deconvolution on channeled 2D images; the underlying ' 'computation is in fact gradient of convolution w.r.t. data', - version=1) + version=2) decl_opr('MaskConvolution', inputs=[Doc('src', @@ -138,14 +138,14 @@ decl_opr('LRN', decl_opr('Pooling', inputs=['src'], - params='Pooling') + params='Pooling',version=1) decl_opr('AdaptivePooling', inputs=[Doc('src', 'input image, shape (n, c, ih, iw)'), Doc('out_shape', 'output image shape, containing two elements specifying output height and width.')], params='AdaptivePooling', desc='Adaptive Pooling.' - 'The output shape is (n, c, oh, ow), where (oh, ow) is given by *out_shape*.') + 'The output shape is (n, c, oh, ow), where (oh, ow) is given by *out_shape*.',version=1) decl_opr('ROIPooling', outputs=[0], inputs=[Doc('src', 'input image, shape (n, c, ih, iw)'), @@ -215,7 +215,7 @@ decl_opr('ConvBiasForward', ('execution_policy', 'ExecutionPolicy')], desc=('activation(convolution(src, filter) + bias) with specified ' 'dtype'), - version=3, has_out_dtype=True) + version=4, has_out_dtype=True) decl_opr('BatchNorm', pyname='batch_norm', @@ -255,7 +255,7 @@ r""" & iw=-pad_w+ow \\times stride_w \\\\ & grp_h = oh / (OH / spatial_groups_h) \\\\ & grp_w = ow / (OW / spatial_groups_w) -"""), +"""), version=1, has_out_dtype=True) decl_opr('ROIAlign', outputs=[0], @@ -270,7 +270,7 @@ decl_opr('ROIAlign', outputs=[0], desc='ROI Align, see ' 'Mask-RCNN: https://arxiv.org/pdf/1703.06870.pdf, ' 'The output shape is (m, c, pooled_height, pooled_width), where (pooled_height, pooled_width) is given by ' - '*Param*.') + '*Param*.',version=1) decl_opr('DeformableConvForward', pyname='deformable_conv', @@ -312,7 +312,7 @@ r""" * filter_{n, oc, ic, kh, kw} \\\\ \\text{where} & ih=-pad_h+oh \\times stride_h \\\\ & iw=-pad_w+ow \\times stride_w -"""), +"""), version=1, has_out_dtype=True) decl_opr('FakeQuant', diff --git a/src/opr/impl/dnn/dnn.sereg.h b/src/opr/impl/dnn/dnn.sereg.h index 65f1e1718..b99996569 100644 --- a/src/opr/impl/dnn/dnn.sereg.h +++ b/src/opr/impl/dnn/dnn.sereg.h @@ -22,10 +22,84 @@ #include "megbrain/opr/dnn/tqt.h" #include "megbrain/serialization/sereg.h" +#include "megdnn/opr_param_defs.h" +#include "megdnn/oprs/nn.h" namespace mgb { namespace serialization { + template + struct MakePoolingCaller1 { + template + static VarNode* make(const cg::VarNodeArray& inputs, + const typename MegDNNPooling::Param& param, + const OperatorNodeConfig& config) { + if (inputs.size() == 1) { + return Opr::make(inputs[0], param, config).node(); + } + return nullptr; + } + }; + + template + struct MakeROIAlignCaller1 { + template + static VarNode* make(const cg::VarNodeArray& inputs, + const typename MegDNNROIALIGN::Param& param, + const OperatorNodeConfig& config) { + if (inputs.size() == 2) { + return Opr::make(inputs[0],inputs[1], param, config).node(); + } else { + return nullptr; + } + } + }; + + template + struct MakeROIAlignCaller4 { + template + static VarNode* make(const cg::VarNodeArray& inputs, + const typename MegDNNROIALIGN::Param& param, + const OperatorNodeConfig& config) { + if (inputs.size() == 4) { + return Opr::make(inputs[0], inputs[1], inputs[2], inputs[3], + param, config) + .node(); + } else { + return nullptr; + } + } + }; + + template + struct MakePoolingBackwardCaller3 { + template + static VarNode* make(const cg::VarNodeArray& inputs, + const typename MegDNNPooling::Param& param, + const OperatorNodeConfig& config) { + if (inputs.size() == 3) { + return Opr::make(inputs[0], inputs[1], inputs[2], param, config) + .node(); + } + return nullptr; + } + }; + + template + struct MakeAdaptivePoolingBackwardCaller3 { + template + static VarNode* make(const cg::VarNodeArray& inputs, + const typename MegDNNPooling::Param& param, + const OperatorNodeConfig& config) { + if (inputs.size() == 4) { + return Opr::make(inputs[0], inputs[1], inputs[2], inputs[3], + param, config) + .node(); + } + return nullptr; + } + }; + template struct MakeConvCaller2 { template @@ -41,6 +115,7 @@ namespace serialization { return nullptr; } }; + template struct MakeConvCaller3 { template @@ -56,6 +131,7 @@ namespace serialization { return nullptr; } }; + template struct MakeConvCaller4 { template @@ -71,6 +147,7 @@ namespace serialization { return nullptr; } }; + template struct MakeConvCaller5 { template @@ -141,6 +218,75 @@ namespace serialization { } }; + template + struct PoolingLoadDumpImpl { + static void dump(OprDumpContext& ctx, + const cg::OperatorNodeBase& opr_) { + auto&& opr = opr_.cast_final_safe(); + ctx.write_param(opr.param()); + } + + static VarNode* make( + const cg::VarNodeArray& inputs, const PoolingParam& param, + const OperatorNodeConfig& config) { + VarNode* ret = Maker0::template make(inputs, param, + config); + mgb_assert(ret); + return ret; + } + + static cg::OperatorNodeBase* load(OprLoadContext& ctx, + const cg::VarNodeArray& inputs, + const OperatorNodeConfig& config) { + auto param = ctx.read_param(); + return make(inputs, param, config)->owner_opr(); + } + }; + + template<> + struct OprLoadDumpImpl: + public PoolingLoadDumpImpl, + megdnn::param::AdaptivePooling> + {}; + + template<> + struct OprLoadDumpImpl: + public PoolingLoadDumpImpl, + megdnn::param::AdaptivePooling> + {}; + + template<> + struct OprLoadDumpImpl: + public PoolingLoadDumpImpl, + megdnn::param::ROIAlign> + {}; + + template<> + struct OprLoadDumpImpl: + public PoolingLoadDumpImpl, + megdnn::param::ROIAlign> + {}; + + template<> + struct OprLoadDumpImpl: + public PoolingLoadDumpImpl, + megdnn::param::Pooling> + {}; + + template<> + struct OprLoadDumpImpl: + public PoolingLoadDumpImpl, + megdnn::param::Pooling> + {}; + + template<> struct OprLoadDumpImpl: public ConvLoadDumpImpl #include "megbrain/opr/imgproc.h" #include "megbrain/serialization/sereg.h" +#include "megdnn/opr_param_defs.h" namespace mgb { namespace serialization { @@ -38,6 +40,63 @@ namespace serialization { } }; + template <> + struct OprMaker { + using Opr = opr::Remap; + using Param = Opr::Param; + static cg::OperatorNodeBase* make(const Param& param, + const cg::VarNodeArray& inputs, + ComputingGraph& graph, + const OperatorNodeConfig& config) { + MGB_MARK_USED_VAR(graph); + if (inputs.size() == 2) { + return Opr::make(inputs[0], inputs[1], param, config) + .node() + ->owner_opr(); + } else { + return nullptr; + } + } + }; + + template<> + struct OprMaker { + using Opr = opr::RemapBackwardMat; + using Param = Opr::Param; + static cg::OperatorNodeBase* make(const Param& param, + const cg::VarNodeArray& inputs, + ComputingGraph& graph, + const OperatorNodeConfig& config) { + MGB_MARK_USED_VAR(graph); + if (inputs.size() == 3) { + return Opr::make(inputs[0], inputs[1], inputs[2], param, config) + .node() + ->owner_opr(); + } else { + return nullptr; + } + } + }; + + template<> + struct OprMaker { + using Opr = opr::RemapBackwardData; + using Param = Opr::Param; + static cg::OperatorNodeBase* make(const Param& param, + const cg::VarNodeArray& inputs, + ComputingGraph& graph, + const OperatorNodeConfig& config) { + MGB_MARK_USED_VAR(graph); + if (inputs.size() == 3) { + return Opr::make(inputs[0], inputs[1], inputs[2], param, config) + .node() + ->owner_opr(); + } else { + return nullptr; + } + } + }; + template <> struct OprMaker { using Opr = opr::DctChannelSelectForward; @@ -106,29 +165,35 @@ namespace serialization { } // namespace serialization namespace opr { - - MGB_SEREG_OPR(WarpPerspective, 0); - MGB_SEREG_OPR(WarpPerspectiveBackwardData, 0); - MGB_SEREG_OPR(WarpPerspectiveBackwardMat, 0); + using WarpPerspectiveV2=WarpPerspective; + using WarpPerspectiveBackwardDataV2=WarpPerspectiveBackwardData; + using WarpPerspectiveBackwardMatV2=WarpPerspectiveBackwardMat; + MGB_SEREG_OPR(WarpPerspectiveV2, 0); + MGB_SEREG_OPR(WarpPerspectiveBackwardDataV2, 0); + MGB_SEREG_OPR(WarpPerspectiveBackwardMatV2, 0); MGB_SEREG_OPR(Rotate, 1); MGB_SEREG_OPR(CvtColor, 1); MGB_SEREG_OPR(GaussianBlur, 1); MGB_SEREG_OPR(ResizeBackward, 2); - MGB_SEREG_OPR(Remap, 2); - MGB_SEREG_OPR(RemapBackwardData, 3); - MGB_SEREG_OPR(RemapBackwardMat, 3); + using RemapV1=Remap; + using RemapBackwardDataV1=RemapBackwardData; + using RemapBackwardMatV1=RemapBackwardMat; + MGB_SEREG_OPR(RemapV1, 2); + MGB_SEREG_OPR(RemapBackwardDataV1, 3); + MGB_SEREG_OPR(RemapBackwardMatV1, 3); //! current warp affine version - using WarpAffineV1 = opr::WarpAffine; - MGB_SEREG_OPR(WarpAffineV1, 3); + using WarpAffineV2 = opr::WarpAffine; + MGB_SEREG_OPR(WarpAffineV2, 3); //! current resize version - using ResizeV1 = opr::Resize; - MGB_SEREG_OPR(ResizeV1, 2); + using ResizeV2 = opr::Resize; + MGB_SEREG_OPR(ResizeV2, 2); - MGB_SEREG_OPR(DctChannelSelect, 0); + using DctChannelSelectV1 = opr::DctChannelSelect; + MGB_SEREG_OPR(DctChannelSelectV1, 0); } // namespace opr diff --git a/src/serialization/impl/opr_registry.cpp b/src/serialization/impl/opr_registry.cpp index 4c1df5561..9bd9fd08c 100644 --- a/src/serialization/impl/opr_registry.cpp +++ b/src/serialization/impl/opr_registry.cpp @@ -71,7 +71,6 @@ namespace { void OprRegistry::add(const OprRegistry& record) { auto&& sd = static_data(); - auto persist_id = record.persist_type_id; auto registry_ins = sd.id2reg.emplace(persist_id, record); mgb_assert(registry_ins.second || -- GitLab