diff --git a/dnn/src/cuda/dct/opr_impl.h b/dnn/src/cuda/dct/opr_impl.h
index 60c899b118c94f0831e05a5aadbe208050f90d1a..cb10b390630d7f1ce917aa83bc82b799cd41e7b9 100644
--- a/dnn/src/cuda/dct/opr_impl.h
+++ b/dnn/src/cuda/dct/opr_impl.h
@@ -26,7 +26,7 @@ public:
     size_t get_workspace_in_bytes(const TensorLayout& /*src*/,
                                   const TensorLayout& /*mask_offset*/,
                                   const TensorLayout& /*mask_val*/,
-                                  const TensorLayout& /*dst*/) {
+                                  const TensorLayout& /*dst*/) override {
         return 0;
     };
     void set_error_tracker(void* tracker) override {
diff --git a/dnn/src/cuda/deformable_ps_roi_pooling/opr_impl.h b/dnn/src/cuda/deformable_ps_roi_pooling/opr_impl.h
index 245604de5174958acdf2d9cceba20a6c7308af18..92f1da1db81c1ac9b46846d79003b2d51ea8f4f1 100644
--- a/dnn/src/cuda/deformable_ps_roi_pooling/opr_impl.h
+++ b/dnn/src/cuda/deformable_ps_roi_pooling/opr_impl.h
@@ -43,7 +43,7 @@ public:
                                   const TensorLayout& /* out_diff */,
                                   const TensorLayout& /* out_count */,
                                   const TensorLayout& /* data_diff */,
-                                  const TensorLayout& /* trans_diff */) {
+                                  const TensorLayout& /* trans_diff */) override {
         return 0ULL;
     };
 
diff --git a/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/preprocess_quantize_sum.cu b/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/preprocess_quantize_sum.cu
index c27d77a4f212cedcf48691dc99bc37a2160f1f09..53de8748405868db386fb3eff84f069d657c63ed 100644
--- a/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/preprocess_quantize_sum.cu
+++ b/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/preprocess_quantize_sum.cu
@@ -192,13 +192,11 @@ void megdnn::cuda::exec_span_qsum(const int32_t* qSumA, const uint32_t M,
                                   int32_t* dst, const uint32_t strd,
                                   const int32_t scaler_bias,
                                   cudaStream_t stream) {
-    constexpr size_t TX = 32, TY = 32;
-    constexpr size_t BX = 32, BY = 32;
+    constexpr uint32_t TX = 32, TY = 32, BX = 32, BY = 32;
     dim3 nthreads{TX, TY};
-    dim3 nblocks{static_cast<uint32_t>(DIVUP(N, BX)),
-                 static_cast<uint32_t>(DIVUP(M, BY))};
-    span_qsum<TX, TY, BX, BY><<<nblocks, nthreads, 0, stream>>>(qSumA, M, qSumB, N, dst, strd,
-                                                scaler_bias);
+    dim3 nblocks{DIVUP(N, BX), DIVUP(M, BY)};
+    span_qsum<TX, TY, BX, BY><<<nblocks, nthreads, 0, stream>>>(
+            qSumA, M, qSumB, N, dst, strd, scaler_bias);
     after_kernel_launch();
 }
 
diff --git a/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu b/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu
index 0fb02c1e6b2bed7f636063ba59b0651214876443..4e524f6dd87f800d4212a5f9d67b02a554dd0b0f 100644
--- a/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu
+++ b/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu
@@ -46,10 +46,7 @@ using namespace wmma::experimental::precision;
 
 namespace wmma_matrix_mul_u4 {
 
-constexpr size_t WMMA_M = 8;
-constexpr size_t WMMA_N = 8;
-constexpr size_t WMMA_K = 32;
-constexpr size_t WARP_SIZE = 32;
+constexpr uint32_t WMMA_M = 8, WMMA_N = 8, WMMA_K = 32, WARP_SIZE = 32;
 
 template <size_t WARP_X_, size_t WARP_Y_, size_t ROW_PER_WARP_,
           size_t COL_PER_WARP_>
@@ -334,10 +331,8 @@ __global__ void u4_gemm_template_device_nt(const uint8_t* /*A*/,
 void _do_dispatch_wmma_matrix_mul_u4(const uint8_t* A, const uint8_t* B,
                                      int32_t* C, int M, int N, int K, int lda,
                                      int ldb, int ldc, cudaStream_t stream) {
-    constexpr size_t warp_x = 4;
-    constexpr size_t warp_y = 4;
-    constexpr size_t row_per_warp = 4;
-    constexpr size_t col_per_warp = 4;
+    constexpr uint32_t warp_x = 4, warp_y = 4, row_per_warp = 4,
+                       col_per_warp = 4;
     typedef BlockConfig<warp_x, warp_y, row_per_warp, col_per_warp>
             BlockConfig_;
     dim3 block{warp_x * WARP_SIZE, warp_y};
diff --git a/dnn/src/fallback/type_cvt/opr_impl.cpp b/dnn/src/fallback/type_cvt/opr_impl.cpp
index 04020f9d9671914f7225ce35f627689691e7b36c..e01054684936a1c2643ff50228052caf8a644f55 100644
--- a/dnn/src/fallback/type_cvt/opr_impl.cpp
+++ b/dnn/src/fallback/type_cvt/opr_impl.cpp
@@ -110,8 +110,10 @@ void do_cvt_normal_s32(_megdnn_tensor_in src, _megdnn_tensor_out dst) {
     float scale = dst.layout.dtype.param<dtype::QuantizedS32>().scale;
     float dscale = 1.f / scale;
     for (size_t i = 0; i < n; ++i) {
-        dptr[i] = saturate<int32_t, float>(std::round(sptr[i] * dscale),
-                                           -2147483648, 2147483647);
+        dptr[i] = saturate<int32_t, float>(
+                std::round(sptr[i] * dscale),
+                static_cast<float>(std::numeric_limits<int32_t>::min()),
+                static_cast<float>(std::numeric_limits<int32_t>::max()));
     }
 }
 
@@ -219,8 +221,10 @@ void do_cvt_s8_s32(_megdnn_tensor_in src, _megdnn_tensor_out dst) {
     float dst_scale = dst.layout.dtype.param<dtype::QuantizedS32>().scale;
     float scale = src_scale / dst_scale;
     for (size_t i = 0; i < n; ++i) {
-        dptr[i] = saturate<int32_t, float>(std::round(sptr[i] * scale),
-                                           -2147483648, 2147483647);
+        dptr[i] = saturate<int32_t, float>(
+                std::round(sptr[i] * scale),
+                static_cast<float>(std::numeric_limits<int32_t>::min()),
+                static_cast<float>(std::numeric_limits<int32_t>::max()));
     }
 }
 
@@ -232,8 +236,10 @@ void do_cvt_s32_s32(_megdnn_tensor_in src, _megdnn_tensor_out dst) {
     float dst_scale = dst.layout.dtype.param<dtype::QuantizedS32>().scale;
     float scale = src_scale / dst_scale;
     for (size_t i = 0; i < n; ++i) {
-        dptr[i] = saturate<int32_t, float>(std::round(sptr[i] * scale),
-                                           -2147483648, 2147483647);
+        dptr[i] = saturate<int32_t, float>(
+                std::round(sptr[i] * scale),
+                static_cast<float>(std::numeric_limits<int32_t>::min()),
+                static_cast<float>(std::numeric_limits<int32_t>::max()));
     }
 }
 
@@ -247,9 +253,10 @@ void do_cvt_asymm8_s32(_megdnn_tensor_in src, _megdnn_tensor_out dst) {
     float dst_scale = dst.layout.dtype.param<dtype::QuantizedS32>().scale;
     float scale = src_scale / dst_scale;
     for (size_t i = 0; i < n; ++i) {
-        dptr[i] =
-                saturate<int32_t, float>(std::round((sptr[i] - src_zp) * scale),
-                                         -2147483648, 2147483647);
+        dptr[i] = saturate<int32_t, float>(
+                std::round((sptr[i] - src_zp) * scale),
+                static_cast<float>(std::numeric_limits<int32_t>::min()),
+                static_cast<float>(std::numeric_limits<int32_t>::max()));
     }
 }
 
diff --git a/dnn/src/x86/quantized_converter.h b/dnn/src/x86/quantized_converter.h
index a0635b54f4a28087ec726879cc7f384706b4abee..96bbed986bf7003d0cd03ae545dfc9c0d14fc261 100644
--- a/dnn/src/x86/quantized_converter.h
+++ b/dnn/src/x86/quantized_converter.h
@@ -66,8 +66,10 @@ inline dt_quint8 QConverter::convert(const float& src, const uint8_t& zp) {
 
 template <>
 inline dt_qint32 QConverter::convert(const float& src) {
-    return dt_qint32(
-            saturate<int32_t, float>(std::round(src), -2147483648, 2147483647));
+    return dt_qint32(saturate<int32_t, float>(
+            std::round(src),
+            static_cast<float>(std::numeric_limits<int32_t>::min()),
+            static_cast<float>(std::numeric_limits<int32_t>::max())));
 }
 
 template <>
diff --git a/src/core/impl/graph/cg_impl_seq.h b/src/core/impl/graph/cg_impl_seq.h
index 0eee81ab210d30e5b8664d599e08ec1f7d2c54eb..625091bcdea4104fef5965f176b7f2e102680341 100644
--- a/src/core/impl/graph/cg_impl_seq.h
+++ b/src/core/impl/graph/cg_impl_seq.h
@@ -101,7 +101,8 @@ public:
     ComputingSequence(const std::shared_ptr<ComputingGraph>& graph)
             : m_owner_graph_refkeep{graph},
               m_owner_graph{ComputingGraphImpl::downcast(graph.get())},
-              m_have_parent_graph{m_owner_graph->m_parent_graph} {}
+              m_have_parent_graph{
+                      static_cast<bool>(m_owner_graph->m_parent_graph)} {}
 
     GraphExecutable::ExecEnv& exec_env() { return m_exec_env; }
 
diff --git a/src/opr/impl/internal/indexing_helper.cpp b/src/opr/impl/internal/indexing_helper.cpp
index 3b4520035b2c54ba6923daa11cc66988ab450e41..df6a81a9caca922ca2140e62df8c7bf989717986 100644
--- a/src/opr/impl/internal/indexing_helper.cpp
+++ b/src/opr/impl/internal/indexing_helper.cpp
@@ -371,9 +371,11 @@ serialization::IndexDescMaskDump::from_index_desc(const IndexDesc &desc) {
     ret.nr_item = desc.size();
     for (size_t i = 0; i < desc.size(); ++ i) {
         auto &&s = desc[i];
-        ret.items[i] = {
-            static_cast<int8_t>(s.axis.get_raw()),
-            s.begin.node(), s.end.node(), s.step.node(), s.idx.node()};
+        ret.items[i] = {static_cast<int8_t>(s.axis.get_raw()),
+                        static_cast<bool>(s.begin.node()),
+                        static_cast<bool>(s.end.node()),
+                        static_cast<bool>(s.step.node()),
+                        static_cast<bool>(s.idx.node())};
     }
     return ret;
 }