From 0b32056828821b85506c0524f3312e8cbc2e9ce2 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Fri, 15 May 2020 13:59:57 +0800 Subject: [PATCH] feat(dnn/naive): support weight preprocess interface in dnn GitOrigin-RevId: 84791aacf9ed53214fccaf074e67fd1460383380 --- dnn/include/megdnn/oprs/nn.h | 8 +- dnn/src/common/conv_bias.cpp | 2 +- dnn/src/common/convolution.cpp | 3 +- dnn/src/cuda/conv_bias/bfloat16.cpp | 2 +- dnn/src/cuda/conv_bias/opr_impl.cpp | 4 +- dnn/src/cuda/conv_bias/opr_impl.h | 29 ++++- dnn/src/cuda/convolution/opr_impl.cpp | 18 ++- dnn/src/cuda/convolution/opr_impl.h | 33 +++-- dnn/src/cuda/mask_conv/opr_impl.cpp | 2 +- dnn/src/cuda/mask_conv/opr_impl.h | 2 +- dnn/src/fallback/conv_bias/opr_impl.cpp | 21 ++-- dnn/src/fallback/conv_bias/opr_impl.h | 6 +- dnn/src/fallback/convolution/opr_impl.cpp | 13 +- dnn/src/fallback/convolution/opr_impl.h | 6 +- dnn/src/naive/conv_bias/opr_impl.cpp | 4 +- dnn/src/naive/conv_bias/opr_impl.h | 35 +++++- dnn/src/naive/convolution/convolution.cpp | 51 ++++---- dnn/src/naive/convolution/opr_impl.h | 30 ++++- dnn/src/naive/convpooling/conv_pooling.cpp | 2 +- dnn/src/naive/mask_conv/opr_impl.cpp | 4 +- dnn/src/naive/separable_conv/opr_impl.cpp | 2 +- .../arm_common/conv_bias_multi_thread.cpp | 5 +- dnn/test/common/conv_bias.cpp | 4 +- dnn/test/common/opr_proxy.h | 118 +++++++++++++++++- dnn/test/cpu/mask_conv.cpp | 4 +- dnn/test/cuda/chanwise_convolution.cpp | 42 ++++++- dnn/test/dispatcher/null.cpp | 4 +- dnn/test/naive/conv_bias.cpp | 4 +- dnn/test/naive/convolution.cpp | 5 +- dnn/test/x86/conv_bias.cpp | 7 +- 30 files changed, 372 insertions(+), 98 deletions(-) diff --git a/dnn/include/megdnn/oprs/nn.h b/dnn/include/megdnn/oprs/nn.h index 4d8e2418..78afc2ea 100644 --- a/dnn/include/megdnn/oprs/nn.h +++ b/dnn/include/megdnn/oprs/nn.h @@ -210,21 +210,25 @@ public: _megdnn_tensor_out dst, const PreprocessedFilter* preprocessed_filter, _megdnn_workspace workspace) = 0; + virtual void exec_preprocess(const TensorLayout& src_layout, _megdnn_tensor_in filter, const TensorLayout& dst_layout, PreprocessedFilter* preprocessed_filter, _megdnn_workspace workspace) = 0; void deduce_dtype(DType src, DType filter, DType& dst); + void deduce_layout(const TensorLayout& src, const TensorLayout& filter, TensorLayout& dst); virtual size_t get_workspace_in_bytes( const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst, - PreprocessedFilter* preprocessed_filter) = 0; + const PreprocessedFilter* preprocessed_filter) = 0; + virtual SmallVector deduce_preprocessed_filter_layout( const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst) = 0; + virtual size_t get_preprocess_workspace_in_bytes( const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst) = 0; @@ -337,7 +341,7 @@ public: const TensorLayout& src, const TensorLayout& filter, const TensorLayout& bias, const TensorLayout& z, const TensorLayout& dst, - PreprocessedFilter* preprocessed_filter) = 0; + const PreprocessedFilter* preprocessed_filter) = 0; virtual size_t get_preprocess_workspace_in_bytes( const TensorLayout& src, const TensorLayout& filter, const TensorLayout& bias, const TensorLayout& z, diff --git a/dnn/src/common/conv_bias.cpp b/dnn/src/common/conv_bias.cpp index 0bf79e0f..ee834fa8 100644 --- a/dnn/src/common/conv_bias.cpp +++ b/dnn/src/common/conv_bias.cpp @@ -76,7 +76,7 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( auto ret = check_layout_fwd(src, filter, dst); megdnn_assert_contiguous(bias); auto required_workspace_in_bytes = - get_workspace_in_bytes(src, filter, bias, z, dst); + get_workspace_in_bytes(src, filter, bias, z, dst, nullptr); megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); if (bias.ndim != 0) { //! bias.layout == dst.layout failed, no assert information diff --git a/dnn/src/common/convolution.cpp b/dnn/src/common/convolution.cpp index 50166749..9b8140d7 100644 --- a/dnn/src/common/convolution.cpp +++ b/dnn/src/common/convolution.cpp @@ -981,7 +981,8 @@ ConvolutionForward::CanonizedFilterMeta ConvolutionForward::check_exec( const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst, size_t workspace_in_bytes) { auto ret = check_layout_fwd(src, filter, dst); - auto required_workspace_in_bytes = get_workspace_in_bytes(src, filter, dst); + auto required_workspace_in_bytes = + get_workspace_in_bytes(src, filter, dst, nullptr); megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); return ret; } diff --git a/dnn/src/cuda/conv_bias/bfloat16.cpp b/dnn/src/cuda/conv_bias/bfloat16.cpp index aa70ea87..283ffd8f 100644 --- a/dnn/src/cuda/conv_bias/bfloat16.cpp +++ b/dnn/src/cuda/conv_bias/bfloat16.cpp @@ -112,7 +112,7 @@ void ConvBiasForwardImpl::AlgoBFloat16::exec(const ExecArgs& args) const { convbias_opr->param().compute_mode = Param::ComputeMode::DEFAULT; convbias_opr->execution_policy() = {m_impl}; convbias_opr->exec(fsrc_tensor, ffilter_tensor, fbias_tensor, fz_tensor, - fdst_tensor, cvter.workspace()); + fdst_tensor, nullptr, cvter.workspace()); } { cvter.comp_to_dst_type(fdst_tensor, *args.dst_tensor); } } diff --git a/dnn/src/cuda/conv_bias/opr_impl.cpp b/dnn/src/cuda/conv_bias/opr_impl.cpp index c8b91ca1..f96a64bf 100644 --- a/dnn/src/cuda/conv_bias/opr_impl.cpp +++ b/dnn/src/cuda/conv_bias/opr_impl.cpp @@ -25,6 +25,7 @@ namespace cuda { void ConvBiasForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_in bias, _megdnn_tensor_in z, _megdnn_tensor_out dst, + const PreprocessedFilter*, _megdnn_workspace workspace) { check_exec(src.layout, filter.layout, bias.layout, z.layout, dst.layout, workspace.size); @@ -208,7 +209,8 @@ size_t ConvBiasForwardImpl::get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& filter, const TensorLayout& bias, const TensorLayout& z, - const TensorLayout& dst) { + const TensorLayout& dst, + const PreprocessedFilter*) { AlgoBase::SizeArgs args{this, src, filter, bias, z, dst}; return get_algorithm(this, src, filter, bias, z, dst) ->get_workspace_in_bytes(args); diff --git a/dnn/src/cuda/conv_bias/opr_impl.h b/dnn/src/cuda/conv_bias/opr_impl.h index a5fcaeda..67489134 100644 --- a/dnn/src/cuda/conv_bias/opr_impl.h +++ b/dnn/src/cuda/conv_bias/opr_impl.h @@ -20,7 +20,9 @@ public: using ConvBiasForward::ConvBiasForward; void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_in bias, _megdnn_tensor_in z, - _megdnn_tensor_out dst, _megdnn_workspace workspace) override; + _megdnn_tensor_out dst, + const PreprocessedFilter* preprocessed_filter, + _megdnn_workspace workspace) override; std::vector get_all_algorithms( const TensorLayout& src, const TensorLayout& filter, const TensorLayout& bias, const TensorLayout& z, @@ -34,7 +36,30 @@ public: bool reproducible) override; size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, const TensorLayout&, const TensorLayout&, - const TensorLayout&) override; + const TensorLayout&, + const PreprocessedFilter*) override; + + size_t get_preprocess_workspace_in_bytes(const TensorLayout&, + const TensorLayout&, + const TensorLayout&, + const TensorLayout&, + const TensorLayout&) override { + return 0; + }; + SmallVector deduce_preprocessed_filter_layout( + const TensorLayout&, const TensorLayout&, const TensorLayout&, + const TensorLayout&, const TensorLayout&) override { + return {}; + } + void exec_preprocess(const TensorLayout& , + _megdnn_tensor_in , + const TensorLayout& , + const TensorLayout& , + const TensorLayout& , + PreprocessedFilter* , + _megdnn_workspace ) override { + megdnn_throw("cuda conv_bias exec_preprocess has not implemeted yet"); + } const char* get_algorithm_set_name() const override; diff --git a/dnn/src/cuda/convolution/opr_impl.cpp b/dnn/src/cuda/convolution/opr_impl.cpp index 7832a0b4..f04fbdc2 100644 --- a/dnn/src/cuda/convolution/opr_impl.cpp +++ b/dnn/src/cuda/convolution/opr_impl.cpp @@ -73,22 +73,32 @@ ConvolutionForwardImpl::get_all_algorithms(const TensorLayout& src, size_t ConvolutionForwardImpl::get_workspace_in_bytes( const TensorLayout& src, const TensorLayout& filter, - const TensorLayout& dst) { + const TensorLayout& dst, + const PreprocessedFilter* preprocessed_filter) { auto extra_data = conv_bias_extra_data(dst); return static_cast(extra_data.convbias_opr.get()) - ->get_workspace_in_bytes(src, filter, extra_data.bias_layout, - extra_data.z_layout, dst); + ->get_workspace_in_bytes( + src, filter, extra_data.bias_layout, extra_data.z_layout, + dst, + reinterpret_cast::PreprocessedFilter*>( + preprocessed_filter)); } void ConvolutionForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_out dst, + const PreprocessedFilter* preprocessed_filter, _megdnn_workspace workspace) { auto extra_data = conv_bias_extra_data(dst.layout); TensorND bias(nullptr, extra_data.bias_layout); TensorND z(nullptr, extra_data.z_layout); return static_cast(extra_data.convbias_opr.get()) - ->exec(src, filter, bias, z, dst, workspace); + ->exec(src, filter, bias, z, dst, + reinterpret_cast::PreprocessedFilter*>( + preprocessed_filter), + workspace); } const char* ConvolutionForwardImpl::get_algorithm_set_name() const { diff --git a/dnn/src/cuda/convolution/opr_impl.h b/dnn/src/cuda/convolution/opr_impl.h index e8c73cec..4326d777 100644 --- a/dnn/src/cuda/convolution/opr_impl.h +++ b/dnn/src/cuda/convolution/opr_impl.h @@ -11,6 +11,7 @@ #pragma once #include "megdnn/oprs/nn.h" +#include "src/common/utils.h" namespace megdnn { namespace cuda { @@ -18,10 +19,11 @@ namespace cuda { class ConvolutionForwardImpl: public ConvolutionForward { public: using ConvolutionForward::ConvolutionForward; - void exec(_megdnn_tensor_in src, - _megdnn_tensor_in filter, - _megdnn_tensor_out dst, - _megdnn_workspace workspace) override; + void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, + _megdnn_tensor_out dst, + const PreprocessedFilter* preprocessed_filter, + _megdnn_workspace workspace) override; + std::vector get_all_algorithms(const TensorLayout &src, const TensorLayout &filter, const TensorLayout &dst) override; @@ -30,11 +32,28 @@ class ConvolutionForwardImpl: public ConvolutionForward { const TensorLayout& dst, size_t workspace_limit_in_bytes, bool reproducible) override; - size_t get_workspace_in_bytes(const TensorLayout& src, - const TensorLayout& filter, - const TensorLayout& dst) override; + size_t get_workspace_in_bytes( + const TensorLayout& src, const TensorLayout& filter, + const TensorLayout& dst, + const PreprocessedFilter* preprocessed_filter) override; const char* get_algorithm_set_name() const override; + SmallVector deduce_preprocessed_filter_layout( + const TensorLayout&, const TensorLayout&, + const TensorLayout&) override { + return {}; + } + size_t get_preprocess_workspace_in_bytes( + const TensorLayout& , const TensorLayout& , + const TensorLayout& ) override{ + return 0; + } + void exec_preprocess(const TensorLayout&, _megdnn_tensor_in, + const TensorLayout&, PreprocessedFilter*, + _megdnn_workspace) override { + megdnn_throw("cuda exec_preprocess has not implemeted yet"); + } + protected: struct ConvBiasExtraData{ std::unique_ptr convbias_opr; diff --git a/dnn/src/cuda/mask_conv/opr_impl.cpp b/dnn/src/cuda/mask_conv/opr_impl.cpp index 6c34f0d0..3d7e7691 100644 --- a/dnn/src/cuda/mask_conv/opr_impl.cpp +++ b/dnn/src/cuda/mask_conv/opr_impl.cpp @@ -27,7 +27,7 @@ void MaskConvForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_workspace workspace) { megdnn_assert(dst.layout.dtype.enumv() == DTypeTrait::enumv, "Mask conv only support Float32 dtype."); - m_conv_opr->exec(src, filter, dst, workspace); + m_conv_opr->exec(src, filter, dst, nullptr, workspace); auto stream = cuda_stream(handle()); #define cb(DType) \ if (mask.layout.dtype == DType()) { \ diff --git a/dnn/src/cuda/mask_conv/opr_impl.h b/dnn/src/cuda/mask_conv/opr_impl.h index a9b5e53b..39e170e6 100644 --- a/dnn/src/cuda/mask_conv/opr_impl.h +++ b/dnn/src/cuda/mask_conv/opr_impl.h @@ -30,7 +30,7 @@ public: const TensorLayout& dst) override { MEGDNN_MARK_USED_VAR(mask); m_conv_opr->param() = param(); - return m_conv_opr->get_workspace_in_bytes(src, filter, dst); + return m_conv_opr->get_workspace_in_bytes(src, filter, dst, nullptr); } private: diff --git a/dnn/src/fallback/conv_bias/opr_impl.cpp b/dnn/src/fallback/conv_bias/opr_impl.cpp index bb699d47..0a52b5f8 100644 --- a/dnn/src/fallback/conv_bias/opr_impl.cpp +++ b/dnn/src/fallback/conv_bias/opr_impl.cpp @@ -95,7 +95,9 @@ bool ConvBiasImpl::is_naive_algo(ConvBiasImpl::Algorithm* algo) { } void ConvBiasImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_in bias, _megdnn_tensor_in z, - _megdnn_tensor_out dst, _megdnn_workspace workspace) { + _megdnn_tensor_out dst, + const PreprocessedFilter* preprocessed_filter, + _megdnn_workspace workspace) { check_exec(src.layout, filter.layout, bias.layout, z.layout, dst.layout, workspace.size); auto fparam = make_ncb_kern_param(src, filter, bias, dst, workspace); @@ -104,20 +106,21 @@ void ConvBiasImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, ncb_algo_get_workspace(algo, fparam) <= workspace.size) { exec_with_ncb_kern(fparam, algo); } else { - naive::ConvBiasForwardImpl::exec(src, filter, bias, z, dst, workspace); + naive::ConvBiasForwardImpl::exec(src, filter, bias, z, dst, + preprocessed_filter, workspace); } } -size_t ConvBiasImpl::get_workspace_in_bytes(const TensorLayout& src, - const TensorLayout& filter, - const TensorLayout& bias, - const TensorLayout& z, - const TensorLayout& dst) { +size_t ConvBiasImpl::get_workspace_in_bytes( + const TensorLayout& src, const TensorLayout& filter, + const TensorLayout& bias, const TensorLayout& z, + const TensorLayout& dst, + const PreprocessedFilter* preprocessed_filter) { auto fparam = make_ncb_kern_size_param(src, filter, bias, dst); ConvBiasImpl::Algorithm* algo = get_algorithm(fparam); if (is_naive_algo(algo)) { - return naive::ConvBiasForwardImpl::get_workspace_in_bytes(src, filter, - bias, z, dst); + return naive::ConvBiasForwardImpl::get_workspace_in_bytes( + src, filter, bias, z, dst, preprocessed_filter); } else { return ncb_algo_get_workspace(algo, fparam); } diff --git a/dnn/src/fallback/conv_bias/opr_impl.h b/dnn/src/fallback/conv_bias/opr_impl.h index b8ecbc4f..1dc33c78 100644 --- a/dnn/src/fallback/conv_bias/opr_impl.h +++ b/dnn/src/fallback/conv_bias/opr_impl.h @@ -41,14 +41,16 @@ public: //! implemented by exec_with_ncb_kern() void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_in bias, _megdnn_tensor_in z, - _megdnn_tensor_out dst, _megdnn_workspace workspace) override; + _megdnn_tensor_out dst, const PreprocessedFilter*, + _megdnn_workspace workspace) override; //! implemented by get_workspace_with_ncb() size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& filter, const TensorLayout& bias, const TensorLayout& z, - const TensorLayout& dst) override; + const TensorLayout& dst, + const PreprocessedFilter*) override; //! implemented by get_all_algorithms_with_ncb() std::vector get_all_algorithms( diff --git a/dnn/src/fallback/convolution/opr_impl.cpp b/dnn/src/fallback/convolution/opr_impl.cpp index a3f10930..707623cb 100644 --- a/dnn/src/fallback/convolution/opr_impl.cpp +++ b/dnn/src/fallback/convolution/opr_impl.cpp @@ -82,6 +82,7 @@ bool ConvolutionImpl::is_naive_algo(ConvolutionImpl::Algorithm* algo) { } void ConvolutionImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_out dst, + const PreprocessedFilter* preprocessed_filter, _megdnn_workspace workspace) { auto fparam = make_ncb_kern_param(src, filter, dst, workspace); ConvolutionImpl::Algorithm* algo = get_algorithm(fparam, workspace.size); @@ -89,18 +90,20 @@ void ConvolutionImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, ncb_algo_get_workspace(algo, fparam) <= workspace.size) { exec_with_ncb_kern(fparam, algo); } else { - naive::ConvolutionForwardImpl::exec(src, filter, dst, workspace); + naive::ConvolutionForwardImpl::exec(src, filter, dst, + preprocessed_filter, workspace); } } -size_t ConvolutionImpl::get_workspace_in_bytes(const TensorLayout& src, - const TensorLayout& filter, - const TensorLayout& dst) { +size_t ConvolutionImpl::get_workspace_in_bytes( + const TensorLayout& src, const TensorLayout& filter, + const TensorLayout& dst, + const PreprocessedFilter* preprocessed_filter) { auto fparam = make_ncb_kern_size_param(src, filter, dst); Algorithm* algo = get_algorithm(fparam); if (is_naive_algo(algo)) { return naive::ConvolutionForwardImpl::get_workspace_in_bytes( - src, filter, dst); + src, filter, dst, preprocessed_filter); } else { return ncb_algo_get_workspace(algo, fparam); } diff --git a/dnn/src/fallback/convolution/opr_impl.h b/dnn/src/fallback/convolution/opr_impl.h index 467e2879..42dad0d1 100644 --- a/dnn/src/fallback/convolution/opr_impl.h +++ b/dnn/src/fallback/convolution/opr_impl.h @@ -36,12 +36,14 @@ public: //! implemented by exec_with_ncb_kern() void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, - _megdnn_tensor_out dst, _megdnn_workspace workspace) override; + _megdnn_tensor_out dst, const PreprocessedFilter*, + _megdnn_workspace workspace) override; //! implemented by get_workspace_with_ncb() size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& filter, - const TensorLayout& dst) override; + const TensorLayout& dst, + const PreprocessedFilter*) override; //! implemented by get_all_algorithms_with_ncb() std::vector get_all_algorithms( diff --git a/dnn/src/naive/conv_bias/opr_impl.cpp b/dnn/src/naive/conv_bias/opr_impl.cpp index feb832bb..ffa2190d 100644 --- a/dnn/src/naive/conv_bias/opr_impl.cpp +++ b/dnn/src/naive/conv_bias/opr_impl.cpp @@ -54,7 +54,8 @@ size_t ConvBiasForwardImpl::get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& flt, const TensorLayout& bias, const TensorLayout& z, - const TensorLayout& dst) { + const TensorLayout& dst, + const PreprocessedFilter*) { size_t float_workspace_size = 0; if (z.ndim > 0 && z.dtype.category() != DTypeCategory::FLOAT) { @@ -79,6 +80,7 @@ size_t ConvBiasForwardImpl::get_workspace_in_bytes(const TensorLayout& src, void ConvBiasForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_in bias, _megdnn_tensor_in z, _megdnn_tensor_out dst, + const PreprocessedFilter*, _megdnn_workspace workspace) { MIDOUT_BEGIN(megdnn_naive_conv_bias_fwd) { dt_byte *workspace_ptr = workspace.raw_ptr; diff --git a/dnn/src/naive/conv_bias/opr_impl.h b/dnn/src/naive/conv_bias/opr_impl.h index a7a43163..b410d10f 100644 --- a/dnn/src/naive/conv_bias/opr_impl.h +++ b/dnn/src/naive/conv_bias/opr_impl.h @@ -22,7 +22,9 @@ public: using ConvBiasForward::ConvBiasForward; void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_in bias, _megdnn_tensor_in z, - _megdnn_tensor_out dst, _megdnn_workspace workspace) override; + _megdnn_tensor_out dst, + const PreprocessedFilter* preprocessed_filter, + _megdnn_workspace workspace) override; std::vector get_all_algorithms( const TensorLayout& src, const TensorLayout& filter, @@ -37,11 +39,32 @@ public: size_t workspace_limit_in_bytes, bool reproducible) override; - size_t get_workspace_in_bytes(const TensorLayout& src, - const TensorLayout& filter, - const TensorLayout& bias, - const TensorLayout& z, - const TensorLayout& dst) override; + size_t get_workspace_in_bytes( + const TensorLayout& src, const TensorLayout& filter, + const TensorLayout& bias, const TensorLayout& z, + const TensorLayout& dst, + const PreprocessedFilter* preprocessed_filter) override; + + size_t get_preprocess_workspace_in_bytes(const TensorLayout&, + const TensorLayout&, + const TensorLayout&, + const TensorLayout&, + const TensorLayout&) override { + return 0; + } + SmallVector deduce_preprocessed_filter_layout( + const TensorLayout&, const TensorLayout&, const TensorLayout&, + const TensorLayout&, const TensorLayout&) override { + return {}; + } + + void exec_preprocess(const TensorLayout&, _megdnn_tensor_in, + const TensorLayout&, const TensorLayout&, + const TensorLayout&, PreprocessedFilter*, + _megdnn_workspace) override{ + megdnn_throw("conv_bias exec_preprocess is not impl yet"); + } + const char* get_algorithm_set_name() const override; }; diff --git a/dnn/src/naive/convolution/convolution.cpp b/dnn/src/naive/convolution/convolution.cpp index 498d21a7..1b48101a 100644 --- a/dnn/src/naive/convolution/convolution.cpp +++ b/dnn/src/naive/convolution/convolution.cpp @@ -26,15 +26,14 @@ using namespace megdnn; using namespace naive; void ConvolutionForwardImpl::exec(_megdnn_tensor_in src, - _megdnn_tensor_in filter, - _megdnn_tensor_out dst, - _megdnn_workspace workspace) -{ + _megdnn_tensor_in filter, + _megdnn_tensor_out dst, + const PreprocessedFilter*, + _megdnn_workspace workspace) { MIDOUT_BEGIN(megdnn_naive_conv_fwd) { - - auto filter_meta = check_exec( - src.layout, filter.layout, dst.layout, workspace.size); - using ComputeMode = Param::ComputeMode; + auto filter_meta = check_exec(src.layout, filter.layout, dst.layout, + workspace.size); + using ComputeMode = Param::ComputeMode; #define DISPATCH_CMODE(in_dt, out_dt, in_ct, out_ct, comp_ct, cmode) \ do { \ using namespace dtype; \ @@ -52,24 +51,28 @@ void ConvolutionForwardImpl::exec(_megdnn_tensor_in src, #define cb(dt) \ DISPATCH(dt, dt, DTypeTrait
::ctype, DTypeTrait
::ctype, \ DTypeTrait
::ctype) - MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); + MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); #undef cb - DISPATCH(Int8, Int16, dt_int8, dt_int16, dt_int16); - DISPATCH(Int8, Int32, dt_int8, dt_int32, dt_int32); - DISPATCH(QuantizedS8, QuantizedS32, dt_int8, dt_int32, dt_int32); - MEGDNN_INC_FLOAT16(DISPATCH_CMODE(Float16, Float16, dt_float16, dt_float16, - dt_float32, ComputeMode::FLOAT32)); - MEGDNN_INC_FLOAT16(DISPATCH_CMODE(BFloat16, BFloat16, dt_bfloat16, - dt_bfloat16, dt_float32, - ComputeMode::FLOAT32)); - DISPATCH(Quantized8Asymm, QuantizedS32, dt_quint8, dt_qint32, dt_qint32); - DISPATCH(QuantizedS8, QuantizedS8, dt_int8, dt_int8, dt_int32); + DISPATCH(Int8, Int16, dt_int8, dt_int16, dt_int16); + DISPATCH(Int8, Int32, dt_int8, dt_int32, dt_int32); + DISPATCH(QuantizedS8, QuantizedS32, dt_int8, dt_int32, dt_int32); + MEGDNN_INC_FLOAT16(DISPATCH_CMODE(Float16, Float16, dt_float16, + dt_float16, dt_float32, + ComputeMode::FLOAT32)); + MEGDNN_INC_FLOAT16(DISPATCH_CMODE(BFloat16, BFloat16, dt_bfloat16, + dt_bfloat16, dt_float32, + ComputeMode::FLOAT32)); + DISPATCH(Quantized8Asymm, QuantizedS32, dt_quint8, dt_qint32, + dt_qint32); + DISPATCH(QuantizedS8, QuantizedS8, dt_int8, dt_int8, dt_int32); #undef DISPATCH - megdnn_throw(ssprintf("unsupported Conv(%s, %s) -> %s with cmode = %d", - src.layout.dtype.name(), filter.layout.dtype.name(), - dst.layout.dtype.name(), - static_cast(param().compute_mode))); - } MIDOUT_END(); + megdnn_throw(ssprintf("unsupported Conv(%s, %s) -> %s with cmode = %d", + src.layout.dtype.name(), + filter.layout.dtype.name(), + dst.layout.dtype.name(), + static_cast(param().compute_mode))); + } + MIDOUT_END(); } size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes(const TensorLayout& filter, diff --git a/dnn/src/naive/convolution/opr_impl.h b/dnn/src/naive/convolution/opr_impl.h index efe7a241..b7253be2 100644 --- a/dnn/src/naive/convolution/opr_impl.h +++ b/dnn/src/naive/convolution/opr_impl.h @@ -10,6 +10,7 @@ */ #pragma once #include "megdnn/oprs.h" +#include "src/common/utils.h" namespace megdnn { namespace naive { @@ -17,10 +18,10 @@ namespace naive { class ConvolutionForwardImpl: public ConvolutionForward { public: using ConvolutionForward::ConvolutionForward; - void exec(_megdnn_tensor_in src, - _megdnn_tensor_in filter, - _megdnn_tensor_out dst, - _megdnn_workspace workspace) override; + void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, + _megdnn_tensor_out dst, + const PreprocessedFilter* preprocessed_filter, + _megdnn_workspace workspace) override; std::vector get_all_algorithms(const TensorLayout &src, const TensorLayout &filter, const TensorLayout &dst) override; @@ -30,10 +31,29 @@ class ConvolutionForwardImpl: public ConvolutionForward { size_t workspace_limit_in_bytes, bool reproducible) override; size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, - const TensorLayout&) override { + const TensorLayout&, + const PreprocessedFilter*) override { + return 0; + } + + size_t get_preprocess_workspace_in_bytes(const TensorLayout&, + const TensorLayout&, + const TensorLayout&) override { return 0; } + void exec_preprocess(const TensorLayout&, _megdnn_tensor_in, + const TensorLayout&, PreprocessedFilter*, + _megdnn_workspace) override { + megdnn_throw("convolution exec_preprocess in not impl yet"); + } + + SmallVector deduce_preprocessed_filter_layout( + const TensorLayout& , const TensorLayout& , + const TensorLayout& )override{ + return {}; + } + const char* get_algorithm_set_name() const override; }; diff --git a/dnn/src/naive/convpooling/conv_pooling.cpp b/dnn/src/naive/convpooling/conv_pooling.cpp index 6ae7b14b..d91e52ef 100644 --- a/dnn/src/naive/convpooling/conv_pooling.cpp +++ b/dnn/src/naive/convpooling/conv_pooling.cpp @@ -97,7 +97,7 @@ void ConvPoolingForwardImpl::exec(const _megdnn_in TensorND src, TensorND conv_dst((float*)(workspace.raw_ptr), conv_dst_layout); //convFwd->check_layout(src.layout, filter.layout, workspace.layout, empty_wsp.layout); check_layout(src.layout, filter.layout, bias.layout, dst.layout, workspace.size); - convFwd->exec(src, filter, conv_dst, empty_wsp); + convFwd->exec(src, filter, conv_dst, nullptr, empty_wsp); // calculate bias int conv_dst_batch = conv_dst.layout.shape[0]; diff --git a/dnn/src/naive/mask_conv/opr_impl.cpp b/dnn/src/naive/mask_conv/opr_impl.cpp index b09e2db3..aa931f7a 100644 --- a/dnn/src/naive/mask_conv/opr_impl.cpp +++ b/dnn/src/naive/mask_conv/opr_impl.cpp @@ -80,7 +80,7 @@ void MaskConvForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_workspace workspace) { MEGDNN_MARK_USED_VAR(mask); m_conv_opr->param() = this->param(); - m_conv_opr->exec(src, filter, dst, workspace); + m_conv_opr->exec(src, filter, dst, nullptr, workspace); #define cb(DType) \ if (mask.layout.dtype == DType()) { \ using ctype = typename DTypeTrait::ctype; \ @@ -99,7 +99,7 @@ size_t MaskConvForwardImpl::get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) { MEGDNN_MARK_USED_VAR(mask); m_conv_opr->param() = this->param(); - return m_conv_opr->get_workspace_in_bytes(src, filter, dst); + return m_conv_opr->get_workspace_in_bytes(src, filter, dst, nullptr); } void MaskPropagateImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, diff --git a/dnn/src/naive/separable_conv/opr_impl.cpp b/dnn/src/naive/separable_conv/opr_impl.cpp index 652d2997..b2630eed 100644 --- a/dnn/src/naive/separable_conv/opr_impl.cpp +++ b/dnn/src/naive/separable_conv/opr_impl.cpp @@ -103,7 +103,7 @@ void SeparableConvForwardImpl::exec(_megdnn_tensor_in src, ConvolutionForwardImpl* convOptr = new ConvolutionForwardImpl(this->handle()); Workspace empty_wsp; - convOptr->exec(src, filter2d, dst, empty_wsp); + convOptr->exec(src, filter2d, dst, nullptr, empty_wsp); delete(convOptr); free(filter2d_buf); diff --git a/dnn/test/arm_common/conv_bias_multi_thread.cpp b/dnn/test/arm_common/conv_bias_multi_thread.cpp index b11d3df9..051b7f31 100644 --- a/dnn/test/arm_common/conv_bias_multi_thread.cpp +++ b/dnn/test/arm_common/conv_bias_multi_thread.cpp @@ -664,7 +664,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) { conv_bias_opr->get_workspace_in_bytes( tensors[0].layout, filter_transform_layout, tensors[2].layout, tensors[3].layout, - tensors[4].layout); + tensors[4].layout, nullptr); WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(), conv_bias_workspace_in_bytes, @@ -676,7 +676,8 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) { winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor, wb.get_workspace(2)); conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2], - tensors[3], tensors[4], wb.get_workspace(1)); + tensors[3], tensors[4], nullptr, + wb.get_workspace(1)); free(wb.ptr()); }; diff --git a/dnn/test/common/conv_bias.cpp b/dnn/test/common/conv_bias.cpp index e4b87c60..22fdf90a 100644 --- a/dnn/test/common/conv_bias.cpp +++ b/dnn/test/common/conv_bias.cpp @@ -1008,7 +1008,7 @@ void winograd_algo_extra_impl(const TensorNDArray& tensors, uint32_t m, conv_bias_opr->param().output_block_size = m; size_t conv_bias_workspace_in_bytes = conv_bias_opr->get_workspace_in_bytes( tensors[0].layout, filter_transform_layout, tensors[2].layout, - tensors[3].layout, tensors[4].layout); + tensors[3].layout, tensors[4].layout, nullptr); WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(), conv_bias_workspace_in_bytes, @@ -1020,7 +1020,7 @@ void winograd_algo_extra_impl(const TensorNDArray& tensors, uint32_t m, winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor, wb.get_workspace(2)); conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2], - tensors[3], tensors[4], wb.get_workspace(1)); + tensors[3], tensors[4], nullptr, wb.get_workspace(1)); free(wb.ptr()); }; diff --git a/dnn/test/common/opr_proxy.h b/dnn/test/common/opr_proxy.h index cf287f92..121380c6 100644 --- a/dnn/test/common/opr_proxy.h +++ b/dnn/test/common/opr_proxy.h @@ -200,15 +200,70 @@ struct OprProxyProfilingTernary : public OprProxyProfilingBase { using OprProxyProfilingTernary::OprProxyProfilingTernary; \ } -DEF_PROF3(ConvolutionForward); DEF_PROF3(ConvolutionBackwardData); DEF_PROF3(ConvolutionBackwardFilter); DEF_PROF3(LocalShareForward); DEF_PROF3(LocalShareBackwardData); DEF_PROF3(LocalShareBackwardFilter); - #undef DEF_PROF3 +//! TODO: it should adapt weight preprocess later +template <> +struct OprProxy + : public OprProxyProfilingTernary { + using OprProxyProfilingTernary::OprProxyProfilingTernary; + void exec(ConvolutionForward* opr, const TensorNDArray& tensors) { + megdnn_assert(tensors.size() == 3); + if (!Base::W.valid()) { + Base::W = WorkspaceWrapper(opr->handle(), 0); + } + if (Base::m_profiling && !Base::target_algo) { + size_t min_time = std::numeric_limits::max(); + for (auto algo : + opr->get_all_algorithms(tensors[0].layout, tensors[1].layout, + tensors[2].layout)) { + opr->execution_policy().algorithm = algo; + auto workspace_size = opr->get_workspace_in_bytes( + tensors[0].layout, tensors[1].layout, tensors[2].layout, + nullptr); + Base::W.update(workspace_size); + + for (size_t times = 0; times < Base::warmup_times; ++times) + opr->exec(tensors[0], tensors[1], tensors[2], nullptr, + Base::W.workspace()); + megcoreSynchronize(opr->handle()->megcore_computing_handle()); + Timer timer; + timer.start(); + for (size_t times = 0; times < Base::exec_times; ++times) { + opr->exec(tensors[0], tensors[1], tensors[2], nullptr, + Base::W.workspace()); + } + megcoreSynchronize(opr->handle()->megcore_computing_handle()); + timer.stop(); + printf("%.3fms %s\n", timer.get_time_in_us() / 1e3, + algo->name()); + if (min_time > timer.get_time_in_us()) { + min_time = timer.get_time_in_us(); + Base::target_algo = algo; + } + } + opr->execution_policy().algorithm = Base::target_algo; + auto workspace_size = opr->get_workspace_in_bytes( + tensors[0].layout, tensors[1].layout, tensors[2].layout, nullptr); + Base::W.update(workspace_size); + } + if (!Base::target_algo) { + auto workspace_size = opr->get_workspace_in_bytes( + tensors[0].layout, tensors[1].layout, tensors[2].layout, + nullptr); + Base::W.update(workspace_size); + } + opr->exec(tensors[0], tensors[1], tensors[2], nullptr, + Base::W.workspace()); + } +}; + + template struct OprProxyProfiling5 : public OprProxyProfilingBase { using Base = OprProxyProfilingBase; @@ -274,10 +329,67 @@ struct OprProxyProfiling5 : public OprProxyProfilingBase { DEF_PROF5(DeformableConvForward); DEF_PROF5(DeformableConvBackwardFilter); -DEF_PROF5(ConvBiasForward); +//DEF_PROF5(ConvBiasForward); DEF_PROF5(BatchConvBiasForward); #undef DEF_PROF5 +//! TODO: it should adapt weight preprocess later +template <> +struct OprProxy : public OprProxyProfiling5 { + using OprProxyProfiling5::OprProxyProfiling5; + void exec(ConvBiasForward* opr, const TensorNDArray& tensors) { + megdnn_assert(tensors.size() == 5); + if (!Base::W.valid()) { + Base::W = WorkspaceWrapper(opr->handle(), 0); + } + if (Base::m_profiling && !Base::target_algo) { + size_t min_time = std::numeric_limits::max(); + for (auto algo : + opr->get_all_algorithms(tensors[0].layout, tensors[1].layout, + tensors[2].layout, tensors[3].layout, + tensors[4].layout)) { + opr->execution_policy().algorithm = algo; + auto workspace_size = opr->get_workspace_in_bytes( + tensors[0].layout, tensors[1].layout, tensors[2].layout, + tensors[3].layout, tensors[4].layout, nullptr); + Base::W.update(workspace_size); + + for (size_t times = 0; times < Base::warmup_times; ++times) + opr->exec(tensors[0], tensors[1], tensors[2], tensors[3], + tensors[4], nullptr, Base::W.workspace()); + megcoreSynchronize(opr->handle()->megcore_computing_handle()); + Timer timer; + timer.start(); + for (size_t times = 0; times < Base::exec_times; ++times) { + opr->exec(tensors[0], tensors[1], tensors[2], tensors[3], + tensors[4], nullptr, Base::W.workspace()); + } + megcoreSynchronize(opr->handle()->megcore_computing_handle()); + timer.stop(); + printf("%.3fms %s\n", timer.get_time_in_us() / 1e3, + algo->name()); + if (min_time > timer.get_time_in_us()) { + min_time = timer.get_time_in_us(); + Base::target_algo = algo; + } + } + opr->execution_policy().algorithm = Base::target_algo; + auto workspace_size = opr->get_workspace_in_bytes( + tensors[0].layout, tensors[1].layout, tensors[2].layout, + tensors[3].layout, tensors[4].layout, nullptr); + Base::W.update(workspace_size); + } + if (!Base::target_algo) { + auto workspace_size = opr->get_workspace_in_bytes( + tensors[0].layout, tensors[1].layout, tensors[2].layout, + tensors[3].layout, tensors[4].layout, nullptr); + Base::W.update(workspace_size); + } + opr->exec(tensors[0], tensors[1], tensors[2], tensors[3], tensors[4], + nullptr, Base::W.workspace()); + } +}; + template struct OprProxyProfiling8 : public OprProxyProfilingBase { using Base = OprProxyProfilingBase; diff --git a/dnn/test/cpu/mask_conv.cpp b/dnn/test/cpu/mask_conv.cpp index 3031da13..16e07c98 100644 --- a/dnn/test/cpu/mask_conv.cpp +++ b/dnn/test/cpu/mask_conv.cpp @@ -75,8 +75,8 @@ TEST_F(CPU, MASK_PROPAGATE) { auto dst = TensorND{dst_ptr, dst_layout}; WorkspaceWrapper workspace{ handle(), opr->get_workspace_in_bytes(src.layout, filter.layout, - dst.layout)}; - opr->exec(src, filter, dst, workspace.workspace()); + dst.layout, nullptr)}; + opr->exec(src, filter, dst, nullptr, workspace.workspace()); for (size_t i = 0; i < dst.layout.total_nr_elems(); ++i) { mask_dst.ptr()[i] = dst_ptr[i] > 0; } diff --git a/dnn/test/cuda/chanwise_convolution.cpp b/dnn/test/cuda/chanwise_convolution.cpp index 4595fe64..c29db534 100644 --- a/dnn/test/cuda/chanwise_convolution.cpp +++ b/dnn/test/cuda/chanwise_convolution.cpp @@ -176,6 +176,46 @@ public: } } + //! special for weight preprocess + void exec_convolution(ConvolutionForward* opr0, ConvolutionForward* opr1) { + opr0->param().pad_h = pad_h; + opr0->param().pad_w = pad_w; + opr1->param() = opr0->param(); + opr1->param().sparse = param::Convolution::Sparse::GROUP; + + TensorND a0, b0, c0, a1, b1, c1; + std::tie(a0, b0, c0) = shuffle(std::make_tuple( + src0->tensornd(), flt0->tensornd(), dst0->tensornd())); + std::tie(a1, b1, c1) = shuffle(std::make_tuple( + src1->tensornd(), flt1->tensornd(), dst1->tensornd())); + WorkspaceWrapper wk( + handle, + std::max(opr0->get_workspace_in_bytes(a0.layout, b0.layout, + c0.layout, nullptr), + opr1->get_workspace_in_bytes(a1.layout, b1.layout, + c1.layout, nullptr))); + cudaProfilerStart(); + cudaEventRecord(cuda_ev[0], cuda_stream); + opr0->exec(a0, b0, c0, nullptr, wk.workspace()); + cudaEventRecord(cuda_ev[1], cuda_stream); + opr1->exec(a1, b1, c1, nullptr, wk.workspace()); + cudaEventRecord(cuda_ev[2], cuda_stream); + cudaProfilerStop(); + + if (getenv("MEGDNN_CHANWISE_CONV_VERBOSE") || + getenv("MEGDNN_CHANWISE_CONV_FULLBENCH")) { + cudaStreamSynchronize(cuda_stream); + float t0 = -1, t1 = -1; + cudaEventElapsedTime(&t0, cuda_ev[0], cuda_ev[1]); + cudaEventElapsedTime(&t1, cuda_ev[1], cuda_ev[2]); + printf("%s;%s;%s: cudnn/megdnn: %.3fms/%.3fms=%.3f\n", + lsrc.TensorShape::to_string().c_str(), + lflt1.TensorShape::to_string().c_str(), + ldst.TensorShape::to_string().c_str(), + t0, t1, t0 / t1); + } + } + void cmp_dst() { Tensor<> dst0_cpu(handle_cpu, ldst), dst1_cpu(handle_cpu, ldst); megdnn_memcpy_D2H(handle, @@ -399,7 +439,7 @@ TEST_F(CUDA, CHANWISE_CONVOLUTION_FORWARD_BENCH_CHECK) { benv.alloc(N, IC, IH, IW, CHL_MUL, FH, FW, PH, PW); benv.fill_src(); benv.fill_flt(); - benv.exec(conv0.get(), conv1.get()); + benv.exec_convolution(conv0.get(), conv1.get()); benv.cmp_dst(); }; diff --git a/dnn/test/dispatcher/null.cpp b/dnn/test/dispatcher/null.cpp index 7aeef558..6ab132c2 100644 --- a/dnn/test/dispatcher/null.cpp +++ b/dnn/test/dispatcher/null.cpp @@ -30,10 +30,10 @@ TEST(DISPATCHER, NULL_DISPATCHER) auto layout = TensorLayout({1, 1, 1, 1}, dtype::Float32()); TensorND src(nullptr, layout), filter(nullptr, layout), dst(nullptr, layout); - auto wsize = opr->get_workspace_in_bytes(layout, layout, layout); + auto wsize = opr->get_workspace_in_bytes(layout, layout, layout, nullptr); Workspace workspace(nullptr, wsize); - opr->exec(src, filter, dst, workspace); + opr->exec(src, filter, dst, nullptr, workspace); } #endif diff --git a/dnn/test/naive/conv_bias.cpp b/dnn/test/naive/conv_bias.cpp index 3d2972b4..972c0126 100644 --- a/dnn/test/naive/conv_bias.cpp +++ b/dnn/test/naive/conv_bias.cpp @@ -217,11 +217,11 @@ TEST_F(NAIVE, CONV_BIAS_QUANTIZED8x8x32_NCHW32) { size_t ws_size = conv_opr->get_workspace_in_bytes( src_layout_4, filter_layout_4, bias_layout_4, z_layout_4, - dst_layout_4); + dst_layout_4, nullptr); WorkspaceWrapper ws{handle(), ws_size}; conv_opr->exec(src_ts_4.tensornd(), filter_ts_4.tensornd(), bias_ts_4.tensornd(), z_ts_4.tensornd(), dst_ts_4.tensornd(), - ws.workspace()); + nullptr, ws.workspace()); TensorLayout src_layout_32{{N, IC / 32, IH, IW, 32}, dtype::QuantizedS8(0.1f)}; diff --git a/dnn/test/naive/convolution.cpp b/dnn/test/naive/convolution.cpp index c6b90781..761cd922 100644 --- a/dnn/test/naive/convolution.cpp +++ b/dnn/test/naive/convolution.cpp @@ -209,7 +209,8 @@ TEST_F(NAIVE, CONVOLUTION_WITH_NCHW4) { } auto workspace_size = conv->get_workspace_in_bytes( - tensors[0].layout, tensors[1].layout, tensors[2].layout); + tensors[0].layout, tensors[1].layout, tensors[2].layout, + nullptr); dt_byte* workspace_ptr = static_cast(malloc(workspace_size)); Workspace workspace{workspace_ptr, workspace_size}; @@ -217,7 +218,7 @@ TEST_F(NAIVE, CONVOLUTION_WITH_NCHW4) { relayout->exec(nchw4_tensors[0], nchw_tensors[0]); relayout->exec(nchw4_tensors[1], nchw_tensors[1]); - conv->exec(nchw_tensors[0], nchw_tensors[1], nchw_tensors[2], + conv->exec(nchw_tensors[0], nchw_tensors[1], nchw_tensors[2], nullptr, workspace); relayout->exec(nchw_tensors[2], nchw4_tensors[2]); diff --git a/dnn/test/x86/conv_bias.cpp b/dnn/test/x86/conv_bias.cpp index a0a06226..ef442f50 100644 --- a/dnn/test/x86/conv_bias.cpp +++ b/dnn/test/x86/conv_bias.cpp @@ -1334,8 +1334,8 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS) { size_t conv_bias_workspace_in_bytes = conv_bias_opr->get_workspace_in_bytes( tensors[0].layout, filter_transform_layout, - tensors[2].layout, tensors[3].layout, - tensors[4].layout); + tensors[2].layout, tensors[3].layout, tensors[4].layout, + nullptr); WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(), conv_bias_workspace_in_bytes, @@ -1347,7 +1347,8 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS) { winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor, wb.get_workspace(2)); conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2], - tensors[3], tensors[4], wb.get_workspace(1)); + tensors[3], tensors[4], nullptr, + wb.get_workspace(1)); free(wb.ptr()); }; -- GitLab