fix(dnn): correct reduce/argmxx/fakequant calculation with nan

GitOrigin-RevId: 7e78bdae9106186c5d1a1b8ee2ab337ed69db21b

fix(dnn): correct reduce/argmxx/fakequant calculation with nan
GitOrigin-RevId: 7e78bdae9106186c5d1a1b8ee2ab337ed69db21b
89186edc · Megvii Engine Team · 68cdabd2 · 89186edc · 89186edc · 89186edc
9 changed file
--- a/dnn/src/common/argmxx_helper.h
+++ b/dnn/src/common/argmxx_helper.h
@@ -78,6 +78,72 @@ struct ArgmxxOp {
    const wtype INIT;
 };
+template <bool is_max>
+struct ArgmxxOp<dt_float32, is_max> {
+    using stype_ = dt_float32;
+    struct wtype {
+        stype_ key;
+        dt_int32 val;
+        MEGDNN_HOST MEGDNN_DEVICE wtype() {}
+        MEGDNN_HOST MEGDNN_DEVICE wtype(stype_ key, dt_int32 val)
+                : key(key), val(val) {}
+        MEGDNN_HOST MEGDNN_DEVICE wtype(wtype& rhs) : key(rhs.key), val(rhs.val) {}
+        MEGDNN_HOST MEGDNN_DEVICE wtype(volatile wtype& rhs)
+                : key(rhs.key), val(rhs.val) {}
+        MEGDNN_HOST MEGDNN_DEVICE wtype(const wtype& rhs)
+                : key(rhs.key), val(rhs.val) {}
+        MEGDNN_HOST MEGDNN_DEVICE wtype(const volatile wtype& rhs)
+                : key(rhs.key), val(rhs.val) {}
+        MEGDNN_HOST MEGDNN_DEVICE volatile wtype& operator=(const wtype& rhs) volatile {
+            this->key = rhs.key;
+            this->val = rhs.val;
+            return *this;
+        }
+    };
+    MEGDNN_HOST MEGDNN_DEVICE
+    ArgmxxOp(stype_* src, dt_int32* dst, uint32_t A, uint32_t B, uint32_t C)
+            : src(src),
+              dst(dst),
+              A(A),
+              B(B),
+              C(C),
+              INIT(wtype(
+                      is_max ? DTypeTrait<stype_>::min() : DTypeTrait<stype_>::max(),
+                      0)) {}
+    MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) {
+        wtype res;
+        res.key = src[idx];
+        res.val = idx / C % B;
+        return res;
+    }
+    MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) {
+        dst[idx] = val.val;
+    }
+    static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) {
+#if defined(__CUDA_ARCH__)
+        if (isnan(lhs.key))
+#else
+        if (std::isnan(lhs.key))
+#endif
+            return lhs;
+        if (is_max) {
+            if (lhs.key > rhs.key)
+                return lhs;
+            else
+                return rhs;
+        } else {
+            if (lhs.key < rhs.key)
+                return lhs;
+            else
+                return rhs;
+        }
+    }
+    stype_* src;
+    dt_int32* dst;
+    uint32_t A, B, C;
+    const wtype INIT;
+};
 }  // namespace argmxx
 }  // namespace megdnn
 // vim: syntax=cpp.doxygen
--- a/dnn/src/common/reduce_helper.h
+++ b/dnn/src/common/reduce_helper.h
@@ -119,6 +119,28 @@ struct MinOp {
            : INIT(wtype(DTypeTrait<wtype>::max())), src(src), dst(dst), B(B) {}
 };
+template <typename src_ctype, typename dst_ctype>
+struct MinOp<src_ctype, dst_ctype, dt_float32> {
+    typedef dt_float32 wtype;
+    const wtype INIT;
+    src_ctype* src;
+    dst_ctype* dst;
+    const size_t B;
+    MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { return src[idx]; }
+    MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { dst[idx] = val; }
+    static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) {
+#if defined(__CUDA_ARCH__)
+        return (isnan(lhs) || lhs < rhs) ? lhs : rhs;
+#else
+        return (std::isnan(lhs) || lhs < rhs) ? lhs : rhs;
+#endif
+    }
+    MEGDNN_HOST MEGDNN_DEVICE MinOp(src_ctype* src, dst_ctype* dst, size_t B)
+            : INIT(wtype(DTypeTrait<wtype>::max())), src(src), dst(dst), B(B) {}
+};
 template <typename src_ctype, typename dst_ctype, typename wtype_>
 struct MaxOp {
    typedef wtype_ wtype;
@@ -141,6 +163,28 @@ struct MaxOp {
            : INIT(wtype(DTypeTrait<wtype>::min())), src(src), dst(dst), B(B) {}
 };
+template <typename src_ctype, typename dst_ctype>
+struct MaxOp<src_ctype, dst_ctype, dt_float32> {
+    typedef dt_float32 wtype;
+    const wtype INIT;
+    src_ctype* src;
+    dst_ctype* dst;
+    const size_t B;
+    MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { return src[idx]; }
+    MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { dst[idx] = val; }
+    static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) {
+#if defined(__CUDA_ARCH__)
+        return (isnan(lhs) || lhs > rhs) ? lhs : rhs;
+#else
+        return (std::isnan(lhs) || lhs > rhs) ? lhs : rhs;
+#endif
+    }
+    MEGDNN_HOST MEGDNN_DEVICE MaxOp(src_ctype* src, dst_ctype* dst, size_t B)
+            : INIT(wtype(DTypeTrait<wtype>::min())), src(src), dst(dst), B(B) {}
+};
 template <typename src_ctype, typename dst_ctype, typename wtype_>
 struct CheckNonFiniteOp {
    typedef wtype_ wtype;

--- a/dnn/src/cuda/fake_quant/kern.cuh
+++ b/dnn/src/cuda/fake_quant/kern.cuh
@@ -30,6 +30,10 @@ struct FakeQuantKernOp {
    __device__ void operator()(uint32_t idx, ctype scale, ctype zero_point) {
        ctype x = round(input[idx] / scale) + zero_point;
+        if (isnan(x)) {
+            output[idx] = NAN;
+            return;
+        }
        x = fmaxf(fminf(x, qmax), qmin);
        output[idx] = (x - zero_point) * scale;
    }
@@ -54,7 +58,7 @@ struct FakeQuantBwdKernOp {
    __device__ void operator()(uint32_t idx, ctype scale, ctype zero_point) {
        ctype x = round(input[idx] / scale) + zero_point;
-        grad[idx] = x <= qmax && x >= qmin ? diff[idx] : 0.0;
+        grad[idx] = isnan(x) ? NAN : x <= qmax && x >= qmin ? diff[idx] : 0.0;
    }
 #if MEGDNN_CC_HOST
@@ -77,6 +81,10 @@ struct FakeQuantKernOpNonContig {
    __device__ void operator()(
            uint32_t, ctype& output, ctype input, ctype scale, ctype zero_point) {
        ctype x = round(input / scale) + zero_point;
+        if (isnan(x)) {
+            output = NAN;
+            return;
+        }
        x = fmaxf(fminf(x, qmax), qmin);
        output = (x - zero_point) * scale;
    }
@@ -96,7 +104,7 @@ struct FakeQuantBwdKernOpNonContig {
            uint32_t, ctype& grad, ctype diff, ctype input, ctype scale,
            ctype zero_point) {
        ctype x = round(input / scale) + zero_point;
-        grad = x <= qmax && x >= qmin ? diff : 0.0;
+        grad = isnan(x) ? NAN : x <= qmax && x >= qmin ? diff : 0.0;
    }
 #if MEGDNN_CC_HOST

--- a/dnn/src/naive/argmxx/opr_impl.cpp
+++ b/dnn/src/naive/argmxx/opr_impl.cpp
@@ -26,14 +26,18 @@ struct traits;
 template <>
 struct traits<true> {
    static const float init;
-    static bool better_than(float lhs, float rhs) { return lhs > rhs; }
+    static bool better_than(float lhs, float rhs) {
+        return std::isnan(lhs) ? true : lhs > rhs;
+    }
 };
 const float traits<true>::init = std::numeric_limits<float>::lowest();
 template <>
 struct traits<false> {
    static const float init;
-    static float better_than(float lhs, float rhs) { return lhs < rhs; }
+    static float better_than(float lhs, float rhs) {
+        return std::isnan(lhs) ? true : lhs < rhs;
+    }
 };
 const float traits<false>::init = std::numeric_limits<float>::max();

--- a/dnn/src/naive/reduce/opr_impl.cpp
+++ b/dnn/src/naive/reduce/opr_impl.cpp
@@ -73,25 +73,35 @@ const ctype Trait<Mode::PRODUCT, ctype>::INIT = ctype(1);
 template <typename ctype>
 struct Trait<Mode::MIN, ctype> {
-    static const ctype INIT;
    static ctype apply(ctype x, ctype y) { return x < y ? x : y; }
    static ctype visit(ctype x) { return x; }
    static ctype write(ctype x, size_t) { return x; }
 };
-template <typename ctype>
-const ctype Trait<Mode::MIN, ctype>::INIT = DTypeTrait<ctype>::max();
+template <>
+struct Trait<Mode::MIN, dt_float32> {
+    using ctype = dt_float32;
+    static ctype apply(ctype x, ctype y) { return (std::isnan(x) || x < y) ? x : y; }
+    static ctype visit(ctype x) { return x; }
+    static ctype write(ctype x, size_t) { return x; }
+};
 template <typename ctype>
 struct Trait<Mode::MAX, ctype> {
-    static const ctype INIT;
    static ctype apply(ctype x, ctype y) { return x > y ? x : y; }
    static ctype visit(ctype x) { return x; }
    static ctype write(ctype x, size_t) { return x; }
 };
-template <typename ctype>
-const ctype Trait<Mode::MAX, ctype>::INIT = DTypeTrait<ctype>::min();
+template <>
+struct Trait<Mode::MAX, dt_float32> {
+    using ctype = dt_float32;
+    static ctype apply(ctype x, ctype y) { return (std::isnan(x) || x > y) ? x : y; }
+    static ctype visit(ctype x) { return x; }
+    static ctype write(ctype x, size_t) { return x; }
+};
 template <Mode mode, typename ctype>
 void reduce_fwd(

--- a/dnn/test/cuda/fake_quant.cpp
+++ b/dnn/test/cuda/fake_quant.cpp
@@ -21,7 +21,9 @@ using namespace fake_quant;
 TEST_F(CUDA, FAKE_QUANT) {
    std::vector<TestArg> args = get_args();
    auto dtype = dtype::Float32();
-    std::unique_ptr<RNG> rng;
+    UniformFloatRNG rng(-1.0f, 1.0f);
+    const auto nan = std::numeric_limits<float>::quiet_NaN();
+    UniformFloatWithValueRNG rng1 = UniformFloatWithValueRNG(-1.0f, 1.0f, 0.5f, nan);
    for (auto&& arg : args) {
        auto param = arg.param;
@@ -35,6 +37,17 @@ TEST_F(CUDA, FAKE_QUANT) {
                .set_dtype(2, dtype)
                .set_dtype(3, dtype)
                .execs(TensorShapeArray{ishape, scale_shape, zeropoint_shape, ishape});
+        checker.set_allow_invalid_check(true);
+        checker.set_rng(0, &rng1);
+        checker.set_param(param)
+                .set_dtype(0, dtype)
+                .set_dtype(1, dtype)
+                .set_dtype(2, dtype)
+                .set_dtype(3, dtype)
+                .execs(TensorShapeArray{ishape, scale_shape, zeropoint_shape, ishape});
+        checker.set_rng(0, &rng);
+        checker.set_allow_invalid_check(false);
    }
    // test noncontiguous layout
    for (auto&& arg : args) {
@@ -53,12 +66,25 @@ TEST_F(CUDA, FAKE_QUANT) {
                 {scale_shape, dtype::Float32()},
                 {zeropoint_shape, dtype::Float32()},
                 ilayout});
+        checker.set_allow_invalid_check(true);
+        checker.set_rng(0, &rng1);
+        checker.set_param(param).execl(
+                {ilayout,
+                 {scale_shape, dtype::Float32()},
+                 {zeropoint_shape, dtype::Float32()},
+                 ilayout});
+        checker.set_rng(0, &rng);
+        checker.set_allow_invalid_check(false);
    }
 }
 TEST_F(CUDA, FAKE_QUANT_BACKWARD) {
    std::vector<TestArg> args = get_args();
    auto dtype = dtype::Float32();
+    UniformFloatRNG rng(-1.0f, 1.0f);
+    const auto nan = std::numeric_limits<float>::quiet_NaN();
+    UniformFloatWithValueRNG rng1 = UniformFloatWithValueRNG(-1.0f, 1.0f, 0.5f, nan);
    for (auto&& arg : args) {
        auto param = arg.param;
@@ -74,6 +100,19 @@ TEST_F(CUDA, FAKE_QUANT_BACKWARD) {
                .set_dtype(4, dtype)
                .execs(TensorShapeArray{
                        ishape, ishape, scale_shape, zeropoint_shape, ishape});
+        checker.set_allow_invalid_check(true);
+        checker.set_rng(0, &rng1);
+        checker.set_param(param)
+                .set_dtype(0, dtype)
+                .set_dtype(1, dtype)
+                .set_dtype(2, dtype)
+                .set_dtype(3, dtype)
+                .set_dtype(4, dtype)
+                .execs(TensorShapeArray{
+                        ishape, ishape, scale_shape, zeropoint_shape, ishape});
+        checker.set_rng(0, &rng);
+        checker.set_allow_invalid_check(false);
    }
    // test noncontiguous layout
    for (auto&& arg : args) {
@@ -93,6 +132,17 @@ TEST_F(CUDA, FAKE_QUANT_BACKWARD) {
                 {scale_shape, dtype::Float32()},
                 {zeropoint_shape, dtype::Float32()},
                 ilayout});
+        checker.set_allow_invalid_check(true);
+        checker.set_rng(0, &rng1);
+        checker.set_param(param).execl(
+                {ilayout,
+                 ilayout,
+                 {scale_shape, dtype::Float32()},
+                 {zeropoint_shape, dtype::Float32()},
+                 ilayout});
+        checker.set_rng(0, &rng);
+        checker.set_allow_invalid_check(false);
    }
 }

--- a/dnn/test/cuda/reduce.cpp
+++ b/dnn/test/cuda/reduce.cpp
@@ -54,6 +54,20 @@ TEST_F(CUDA, REDUCE) {
    // very large reduce
    checker.execs({{1, 4194304, 1}, {}});
+    // inputs have nan
+    {
+        const auto nan = std::numeric_limits<float>::quiet_NaN();
+        UniformFloatWithValueRNG rng1 =
+                UniformFloatWithValueRNG(-1.0f, 1.0f, 0.5f, nan);
+        checker.set_allow_invalid_check(true).set_rng(0, &rng1);
+        for (auto mode : {Mode::MIN, Mode::MAX}) {
+            checker.set_param({mode, 1});
+            checker.execs({{2, 64, 32}, {}});
+        }
+        checker.set_allow_invalid_check(false);
+    }
+    checker.set_rng(0, &rng);
    auto check = [&](Reduce::Mode mode, DType src_dtype, DType dst_dtype,
                     Reduce::DataType data_type) {
        for (int32_t axis : {0, 1, 2, 3}) {

--- a/imperative/python/test/unit/functional/test_math.py
+++ b/imperative/python/test/unit/functional/test_math.py
@@ -21,7 +21,11 @@ def common_test_reduce(opr, ref_opr):
    data2_shape = (2, 9, 12)
    data1 = np.random.random(data1_shape).astype(np.float32)
    data2 = np.random.random(data2_shape).astype(np.float32)
-    cases = [{"input": data1}, {"input": data2}]
+    cases = [
+        {"input": data1},
+        {"input": data2},
+        {"input": np.array([[[1, 2, np.nan, 4], [8, 6, 5, 2], [2, 3, 4, 5]]])},
+    ]
    if opr not in (F.argmin, F.argmax):
        # test default axis

--- a/imperative/python/test/unit/quantization/test_fake_quant.py
+++ b/imperative/python/test/unit/quantization/test_fake_quant.py
@@ -143,6 +143,11 @@ def test_fakequant():
        assert np.allclose(x.grad.numpy(), x1.grad.numpy())
        assert make_shape_tuple(x.grad.shape) == make_shape_tuple(x1.grad.shape)
+        # test nan
+        x = F.full((1, 32, 3, 3), np.nan)
+        y = fake_quant_tensor(x, qparams).numpy()
+        assert np.isnan(y).all()
    zero_point = tensor([1.0], dtype=np.float32)
    scale = tensor([4.0], dtype=np.float32)
    run(zero_point, scale)