diff --git a/dnn/src/cuda/dct/opr_impl.h b/dnn/src/cuda/dct/opr_impl.h index 60c899b118c94f0831e05a5aadbe208050f90d1a..cb10b390630d7f1ce917aa83bc82b799cd41e7b9 100644 --- a/dnn/src/cuda/dct/opr_impl.h +++ b/dnn/src/cuda/dct/opr_impl.h @@ -26,7 +26,7 @@ public: size_t get_workspace_in_bytes(const TensorLayout& /*src*/, const TensorLayout& /*mask_offset*/, const TensorLayout& /*mask_val*/, - const TensorLayout& /*dst*/) { + const TensorLayout& /*dst*/) override { return 0; }; void set_error_tracker(void* tracker) override { diff --git a/dnn/src/cuda/deformable_ps_roi_pooling/opr_impl.h b/dnn/src/cuda/deformable_ps_roi_pooling/opr_impl.h index 245604de5174958acdf2d9cceba20a6c7308af18..92f1da1db81c1ac9b46846d79003b2d51ea8f4f1 100644 --- a/dnn/src/cuda/deformable_ps_roi_pooling/opr_impl.h +++ b/dnn/src/cuda/deformable_ps_roi_pooling/opr_impl.h @@ -43,7 +43,7 @@ public: const TensorLayout& /* out_diff */, const TensorLayout& /* out_count */, const TensorLayout& /* data_diff */, - const TensorLayout& /* trans_diff */) { + const TensorLayout& /* trans_diff */) override { return 0ULL; }; diff --git a/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/preprocess_quantize_sum.cu b/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/preprocess_quantize_sum.cu index c27d77a4f212cedcf48691dc99bc37a2160f1f09..53de8748405868db386fb3eff84f069d657c63ed 100644 --- a/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/preprocess_quantize_sum.cu +++ b/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/preprocess_quantize_sum.cu @@ -192,13 +192,11 @@ void megdnn::cuda::exec_span_qsum(const int32_t* qSumA, const uint32_t M, int32_t* dst, const uint32_t strd, const int32_t scaler_bias, cudaStream_t stream) { - constexpr size_t TX = 32, TY = 32; - constexpr size_t BX = 32, BY = 32; + constexpr uint32_t TX = 32, TY = 32, BX = 32, BY = 32; dim3 nthreads{TX, TY}; - dim3 nblocks{static_cast(DIVUP(N, BX)), - static_cast(DIVUP(M, BY))}; - span_qsum<<>>(qSumA, M, qSumB, N, dst, strd, - scaler_bias); + dim3 nblocks{DIVUP(N, BX), DIVUP(M, BY)}; + span_qsum<<>>( + qSumA, M, qSumB, N, dst, strd, scaler_bias); after_kernel_launch(); } diff --git a/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu b/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu index 0fb02c1e6b2bed7f636063ba59b0651214876443..4e524f6dd87f800d4212a5f9d67b02a554dd0b0f 100644 --- a/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu +++ b/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu @@ -46,10 +46,7 @@ using namespace wmma::experimental::precision; namespace wmma_matrix_mul_u4 { -constexpr size_t WMMA_M = 8; -constexpr size_t WMMA_N = 8; -constexpr size_t WMMA_K = 32; -constexpr size_t WARP_SIZE = 32; +constexpr uint32_t WMMA_M = 8, WMMA_N = 8, WMMA_K = 32, WARP_SIZE = 32; template @@ -334,10 +331,8 @@ __global__ void u4_gemm_template_device_nt(const uint8_t* /*A*/, void _do_dispatch_wmma_matrix_mul_u4(const uint8_t* A, const uint8_t* B, int32_t* C, int M, int N, int K, int lda, int ldb, int ldc, cudaStream_t stream) { - constexpr size_t warp_x = 4; - constexpr size_t warp_y = 4; - constexpr size_t row_per_warp = 4; - constexpr size_t col_per_warp = 4; + constexpr uint32_t warp_x = 4, warp_y = 4, row_per_warp = 4, + col_per_warp = 4; typedef BlockConfig BlockConfig_; dim3 block{warp_x * WARP_SIZE, warp_y}; diff --git a/dnn/src/fallback/type_cvt/opr_impl.cpp b/dnn/src/fallback/type_cvt/opr_impl.cpp index 04020f9d9671914f7225ce35f627689691e7b36c..e01054684936a1c2643ff50228052caf8a644f55 100644 --- a/dnn/src/fallback/type_cvt/opr_impl.cpp +++ b/dnn/src/fallback/type_cvt/opr_impl.cpp @@ -110,8 +110,10 @@ void do_cvt_normal_s32(_megdnn_tensor_in src, _megdnn_tensor_out dst) { float scale = dst.layout.dtype.param().scale; float dscale = 1.f / scale; for (size_t i = 0; i < n; ++i) { - dptr[i] = saturate(std::round(sptr[i] * dscale), - -2147483648, 2147483647); + dptr[i] = saturate( + std::round(sptr[i] * dscale), + static_cast(std::numeric_limits::min()), + static_cast(std::numeric_limits::max())); } } @@ -219,8 +221,10 @@ void do_cvt_s8_s32(_megdnn_tensor_in src, _megdnn_tensor_out dst) { float dst_scale = dst.layout.dtype.param().scale; float scale = src_scale / dst_scale; for (size_t i = 0; i < n; ++i) { - dptr[i] = saturate(std::round(sptr[i] * scale), - -2147483648, 2147483647); + dptr[i] = saturate( + std::round(sptr[i] * scale), + static_cast(std::numeric_limits::min()), + static_cast(std::numeric_limits::max())); } } @@ -232,8 +236,10 @@ void do_cvt_s32_s32(_megdnn_tensor_in src, _megdnn_tensor_out dst) { float dst_scale = dst.layout.dtype.param().scale; float scale = src_scale / dst_scale; for (size_t i = 0; i < n; ++i) { - dptr[i] = saturate(std::round(sptr[i] * scale), - -2147483648, 2147483647); + dptr[i] = saturate( + std::round(sptr[i] * scale), + static_cast(std::numeric_limits::min()), + static_cast(std::numeric_limits::max())); } } @@ -247,9 +253,10 @@ void do_cvt_asymm8_s32(_megdnn_tensor_in src, _megdnn_tensor_out dst) { float dst_scale = dst.layout.dtype.param().scale; float scale = src_scale / dst_scale; for (size_t i = 0; i < n; ++i) { - dptr[i] = - saturate(std::round((sptr[i] - src_zp) * scale), - -2147483648, 2147483647); + dptr[i] = saturate( + std::round((sptr[i] - src_zp) * scale), + static_cast(std::numeric_limits::min()), + static_cast(std::numeric_limits::max())); } } diff --git a/dnn/src/x86/quantized_converter.h b/dnn/src/x86/quantized_converter.h index a0635b54f4a28087ec726879cc7f384706b4abee..96bbed986bf7003d0cd03ae545dfc9c0d14fc261 100644 --- a/dnn/src/x86/quantized_converter.h +++ b/dnn/src/x86/quantized_converter.h @@ -66,8 +66,10 @@ inline dt_quint8 QConverter::convert(const float& src, const uint8_t& zp) { template <> inline dt_qint32 QConverter::convert(const float& src) { - return dt_qint32( - saturate(std::round(src), -2147483648, 2147483647)); + return dt_qint32(saturate( + std::round(src), + static_cast(std::numeric_limits::min()), + static_cast(std::numeric_limits::max()))); } template <> diff --git a/src/core/impl/graph/cg_impl_seq.h b/src/core/impl/graph/cg_impl_seq.h index 0eee81ab210d30e5b8664d599e08ec1f7d2c54eb..625091bcdea4104fef5965f176b7f2e102680341 100644 --- a/src/core/impl/graph/cg_impl_seq.h +++ b/src/core/impl/graph/cg_impl_seq.h @@ -101,7 +101,8 @@ public: ComputingSequence(const std::shared_ptr& graph) : m_owner_graph_refkeep{graph}, m_owner_graph{ComputingGraphImpl::downcast(graph.get())}, - m_have_parent_graph{m_owner_graph->m_parent_graph} {} + m_have_parent_graph{ + static_cast(m_owner_graph->m_parent_graph)} {} GraphExecutable::ExecEnv& exec_env() { return m_exec_env; } diff --git a/src/opr/impl/internal/indexing_helper.cpp b/src/opr/impl/internal/indexing_helper.cpp index 3b4520035b2c54ba6923daa11cc66988ab450e41..df6a81a9caca922ca2140e62df8c7bf989717986 100644 --- a/src/opr/impl/internal/indexing_helper.cpp +++ b/src/opr/impl/internal/indexing_helper.cpp @@ -371,9 +371,11 @@ serialization::IndexDescMaskDump::from_index_desc(const IndexDesc &desc) { ret.nr_item = desc.size(); for (size_t i = 0; i < desc.size(); ++ i) { auto &&s = desc[i]; - ret.items[i] = { - static_cast(s.axis.get_raw()), - s.begin.node(), s.end.node(), s.step.node(), s.idx.node()}; + ret.items[i] = {static_cast(s.axis.get_raw()), + static_cast(s.begin.node()), + static_cast(s.end.node()), + static_cast(s.step.node()), + static_cast(s.idx.node())}; } return ret; }