fix(dnn/reduce): fix reduce_mean o16c32 is incorrect for large tensor

GitOrigin-RevId: ebf03d814a893efca9dd9e09bb58001c22093fd4

fix(dnn/reduce): fix reduce_mean o16c32 is incorrect for large tensor
GitOrigin-RevId: ebf03d814a893efca9dd9e09bb58001c22093fd4
17371e79 · Megvii Engine Team · 884a07ff · 17371e79 · 17371e79 · 17371e79
4 changed file
--- a/dnn/src/common/reduce_helper.h
+++ b/dnn/src/common/reduce_helper.h
@@ -48,10 +48,10 @@ struct MeanOp {
    src_ctype* src;
    dst_ctype* dst;
    const size_t B;
-
+    
    MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { return src[idx]; }
    MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) {
-        dst[idx] = val / static_cast<dst_ctype>(B);
+        dst[idx] = val / static_cast<wtype>(B);
    }
    static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) {
        return lhs + rhs;

--- a/dnn/test/cuda/reduce.cpp
+++ b/dnn/test/cuda/reduce.cpp
@@ -103,6 +103,16 @@ TEST_F(CUDA, REDUCE) {
                .set_param(param)
                .execs({{1, 4194304, 1}, {1, 1, 1}});
    }
+
+    {
+        // large reduce_mean for O16C32
+        Reduce::Param param{Mode::MEAN, 1,
+                            Reduce::Param::DataType::FLOAT_O16xC32};
+        checker.set_dtype(0, dtype::Float16())
+                .set_dtype(1, dtype::Float16())
+                .set_param(param)
+                .execs({{1, 65536, 5}, {1, 1, 5}});
+    }
 }

 // vim: syntax=cpp.doxygen
--- a/dnn/test/fallback/reduce.cpp
+++ b/dnn/test/fallback/reduce.cpp
@@ -74,6 +74,15 @@ TEST_F(FALLBACK, REDUCE) {
                Config config(param, dtype, shape);
                configs.push_back(config);
            }
+    
+    {
+        // large reduce_mean for O16C32
+        TensorShape shape{1, 65536, 5};
+        Param param(Mode::MEAN, 1, DataType::FLOAT_O16xC32);
+        Config config(param, dtype::Float16(), shape);
+        configs.push_back(config);
+    }
+    
    for (auto&& config : configs) {
        auto&& dtype = config.dtype;
        auto&& param = config.param;

--- a/dnn/test/rocm/reduce.cpp
+++ b/dnn/test/rocm/reduce.cpp
@@ -103,6 +103,16 @@ TEST_F(ROCM, REDUCE) {
                .set_param(param)
                .execs({{1, 4194304, 1}, {1, 1, 1}});
    }
+
+    {
+        // large reduce_mean for O16C32
+        Reduce::Param param{Mode::MEAN, 1,
+                            Reduce::Param::DataType::FLOAT_O16xC32};
+        checker.set_dtype(0, dtype::Float16())
+                .set_dtype(1, dtype::Float16())
+                .set_param(param)
+                .execs({{1, 65536, 5}, {1, 1, 5}});
+    }
 #endif
 }