fix(mgb/naive): fix naive convolution no dispatch kernel in handle

GitOrigin-RevId: 4038fe23a452f8455a2f0d7cf91e173e5769d77c

fix(mgb/naive): fix naive convolution no dispatch kernel in handle
GitOrigin-RevId: 4038fe23a452f8455a2f0d7cf91e173e5769d77c
230ab45a · Megvii Engine Team · Xinran Xu · 22853fa2 · 230ab45a · 230ab45a
7 changed file
--- a/dnn/src/common/conv_bias.cpp
+++ b/dnn/src/common/conv_bias.cpp
@@ -13,6 +13,7 @@
 #include "src/common/conv_bias.h"
 #include "megdnn/oprs/nn.h"
 #include "src/common/utils.h"
+#include "src/common/opr_delegate.h"

 namespace megdnn {

@@ -445,13 +446,13 @@ void handle_bias_and_nonlinear(Handle* handle, param::ConvBias args,

 //! Only used for naive implementation. DO NOT use the following function in
 //! other backends.
-void handle_z_inp_and_activation(Handle* handle,
-                                 param::ConvBias::NonlineMode nonline_mode,
-                                 const TensorND& conv_bias_tensor,
-                                 const TensorND& z_tensor,
-                                 const TensorND& dst_tensor,
-                                 dt_byte* workspace_ptr) {
+void handle_z_inp_and_activation_naive(
+        param::ConvBias::NonlineMode nonline_mode,
+        const TensorND& conv_bias_tensor, const TensorND& z_tensor,
+        const TensorND& dst_tensor, dt_byte* workspace_ptr) {
    auto res = dst_tensor, z_float = z_tensor;
+    //!create naive inplace handle
+    auto handle = inplace_cpu_handle(2);
    if (z_tensor.layout.ndim > 0 &&
        z_tensor.layout.dtype.category() != DTypeCategory::FLOAT) {
        dt_byte *res_float_workspace_ptr = nullptr,

--- a/dnn/src/common/conv_bias.h
+++ b/dnn/src/common/conv_bias.h
@@ -18,16 +18,14 @@
 namespace megdnn {

 void handle_bias_and_nonlinear(Handle* handle, param::ConvBias args,
-                                      const TensorND* conv_dst_tensor,
-                                      const TensorND* dst_tensor,
-                                      const TensorND* bias_tensor);
+                               const TensorND* conv_dst_tensor,
+                               const TensorND* dst_tensor,
+                               const TensorND* bias_tensor);

-void handle_z_inp_and_activation(Handle* handle,
-                                 param::ConvBias::NonlineMode nonline_mode,
-                                 const TensorND& conv_bias_tensor,
-                                 const TensorND& z_tensor,
-                                 const TensorND& dst_tensor,
-                                 dt_byte* workspace_ptr);
+void handle_z_inp_and_activation_naive(
+        param::ConvBias::NonlineMode nonline_mode,
+        const TensorND& conv_bias_tensor, const TensorND& z_tensor,
+        const TensorND& dst_tensor, dt_byte* workspace_ptr);

 }  // namespace megdnn


--- a/dnn/src/common/opr_delegate.cpp
+++ b/dnn/src/common/opr_delegate.cpp
@@ -13,8 +13,8 @@

 using namespace megdnn;

-const std::shared_ptr<Handle>& megdnn::inplace_cpu_handle() {
-    auto make = []() {
+const std::shared_ptr<Handle>& megdnn::inplace_cpu_handle(int debug_level) {
+    auto make = [](int deb_level) {
        megcoreDeviceHandle_t dev_handle;
        megcoreCreateDeviceHandle(&dev_handle, megcorePlatformCPU);
        megcoreComputingHandle_t comp_handle;
@@ -23,12 +23,20 @@ const std::shared_ptr<Handle>& megdnn::inplace_cpu_handle() {
            megcoreDestroyComputingHandle(comp_handle);
            megcoreDestroyDeviceHandle(dev_handle);
        };
-        std::shared_ptr<Handle> handle = Handle::make(comp_handle);
+        std::shared_ptr<Handle> handle = Handle::make(comp_handle, deb_level);
        handle->set_destructor(destructor);
        return handle;
    };
-    static std::shared_ptr<Handle> handle = make();
-    return handle;
+    if (debug_level == 0) {
+        static std::shared_ptr<Handle> handle = make(0);
+        return handle;
+    } else if (debug_level == 1) {
+        static std::shared_ptr<Handle> handle_fallback = make(1);
+        return handle_fallback;
+    } else {
+        static std::shared_ptr<Handle> handle_naive = make(2);
+        return handle_naive;
+    }
 }

 // vim: syntax=cpp.doxygen
--- a/dnn/src/common/opr_delegate.h
+++ b/dnn/src/common/opr_delegate.h
@@ -24,7 +24,7 @@ namespace megdnn {
 * Usually used for calling other opr impls from some opr impl. You probably
 * want to use CpuOprDelegationStorage instead.
 */
-const std::shared_ptr<Handle>& inplace_cpu_handle();
+const std::shared_ptr<Handle>& inplace_cpu_handle(int debug_level = 0);

 /*!
 * \brief storage for oprs on inplace CPU handle

--- a/dnn/src/naive/batch_conv_bias/opr_impl.cpp
+++ b/dnn/src/naive/batch_conv_bias/opr_impl.cpp
@@ -104,8 +104,9 @@ void BatchConvBiasForwardImpl::exec(_megdnn_tensor_in src,
    }
 #undef DISPATCH
 #undef DISPATCH_RAW
-    handle_z_inp_and_activation(handle(), param().nonlineMode, sfb, z, dst,
-                                reinterpret_cast<dt_byte*>(ws.get(1)));
+    MEGDNN_DISPATCH_CPU_KERN_OPR(handle_z_inp_and_activation_naive(
+            param().nonlineMode, sfb, z, dst,
+            reinterpret_cast<dt_byte*>(ws.get(1))));
 }

 std::vector<BatchConvBiasForward::Algorithm*>

--- a/dnn/src/naive/conv_bias/opr_impl.cpp
+++ b/dnn/src/naive/conv_bias/opr_impl.cpp
@@ -137,8 +137,8 @@ void ConvBiasForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter,
        }
 #undef DISPATCH
 #undef DISPATCH_RAW
-        handle_z_inp_and_activation(handle(), param().nonlineMode, sfb, z, dst,
-                                    workspace_ptr);
+        MEGDNN_DISPATCH_CPU_KERN_OPR(handle_z_inp_and_activation_naive(
+                param().nonlineMode, sfb, z, dst, workspace_ptr));
    }
    MIDOUT_END();
 }

--- a/src/core/test/graph/misc.cpp
+++ b/src/core/test/graph/misc.cpp
@@ -11,6 +11,7 @@

 #include "megbrain/opr/io.h"
 #include "megbrain/opr/basic_arith_wrapper.h"
+#include "megbrain/opr/dnn/convolution.h"
 #include "megbrain/opr/utility.h"
 #include "megbrain/opr/blas.h"
 #include "megbrain/opr/tensor_manip.h"
@@ -22,6 +23,7 @@
 #include "megbrain/graph/execution_mask.h"
 #include "megbrain/utils/timer.h"
 #include "megbrain/comp_node_env.h"
+#include "megbrain/gopt/inference.h"

 #include "megbrain/test/helper.h"

@@ -1814,4 +1816,29 @@ TEST(TestGraph, OperatorNodeConfigInstanceID) {
    }
 }

+TEST(TestGraph, NaiveRecord2NCHW44) {
+    auto cn = CompNode::load("cpu0");
+    using ConvParam = megdnn::ConvBias::Param;
+    ConvParam param;
+    param.sparse = ConvParam::Sparse::DENSE;
+    param.format = ConvParam::Format::NCHW44;
+    HostTensorGenerator<> gen;
+    auto host_x = gen({1, 2, 12, 12, 4}, cn),
+         host_w = gen({2, 2, 3, 3, 4, 4}, cn),
+         host_b = gen({1, 2, 1, 1, 4}, cn);
+
+    HostTensorND host_z;
+    auto graph = ComputingGraph::make();
+    auto x = opr::Host2DeviceCopy::make(*graph, host_x),
+         w = opr::Host2DeviceCopy::make(*graph, host_w),
+         b = opr::Host2DeviceCopy::make(*graph, host_b),
+         z = opr::ConvBiasForward::make(x, w, b, param, {});
+    graph->options().comp_node_seq_record_level = 2;
+    graph->options().var_sanity_check_first_run = false;
+    auto func = graph->compile({make_callback_copy(z, host_z)});
+    ComputingGraph::assert_destroy(graph);
+    host_x->copy_from_fixlayout(*gen(host_x->shape(), cn));
+    func->execute().wait();
+}
+
 // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}