chore(format): fix compile bugs after code format

GitOrigin-RevId: 11a4b06f6fb0fb6353c85b9c76c0139ab30c158d

chore(format): fix compile bugs after code format
GitOrigin-RevId: 11a4b06f6fb0fb6353c85b9c76c0139ab30c158d
bfb30dcb · Megvii Engine Team · eeccf2bc · bfb30dcb · bfb30dcb · bfb30dcb
56 changed file
--- a/dnn/src/aarch64/matrix_mul/int8x8x16/kernel_mk4_4x4x8_a72.h
+++ b/dnn/src/aarch64/matrix_mul/int8x8x16/kernel_mk4_4x4x8_a72.h
@@ -53,7 +53,9 @@ static inline void kern_4x4(const int8_t* packA, const int8_t* packB, int K,
    const int8_t* b_ptr = packB;
    LDC = LDC * sizeof(int8_t);
-// clang-format off
+    // clang-format off
    #define STORE_LINE(reg0)                 \
    "cmp w10, #0 \n"                         \
    "beq 101f\n"                             \

--- a/dnn/src/arm_common/elemwise/opr_impl.h
+++ b/dnn/src/arm_common/elemwise/opr_impl.h
@@ -10,7 +10,6 @@
 * implied.
 */
 #pragma once
 #include "src/fallback/elemwise/opr_impl.h"
 #include "src/arm_common/elemwise_op.h"

--- a/dnn/src/arm_common/matrix_mul/int8/gemv.cpp
+++ b/dnn/src/arm_common/matrix_mul/int8/gemv.cpp
@@ -10,6 +10,7 @@
 */
 #include "src/arm_common/simd_macro/marm_neon.h"
 #include "src/arm_common/matrix_mul/int8/gemv.h"
 #include "src/common/utils.h"
 #include "megdnn/oprs.h"

--- a/dnn/src/common/cv/interp_helper.cpp
+++ b/dnn/src/common/cv/interp_helper.cpp
@@ -60,11 +60,8 @@
 #pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
 // TableHolderBase has no problem; ignore the warning for old clang versions
-#include "./helper.h"
 #include "./interp_helper.h"
-#include "src/common/utils.h"
 using namespace megdnn;
 using namespace megdnn::megcv;

--- a/dnn/src/common/cv/interp_helper.h
+++ b/dnn/src/common/cv/interp_helper.h
@@ -62,7 +62,9 @@
 #pragma once
 #include "src/common/cv/aligned_allocator.h"
+#include "src/common/utils.h"
+#include "./helper.h"
 #include "megdnn/opr_param_defs.h"
 #include <cstdint>

--- a/dnn/src/cuda/batch_conv_bias/helper.cuh
+++ b/dnn/src/cuda/batch_conv_bias/helper.cuh
@@ -10,6 +10,7 @@
 */
 #pragma once
 #include "src/cuda/convolution_helper/parameter.cuh"
+#include "src/cuda/utils.cuh"
 namespace megdnn {
 namespace cuda {

--- a/dnn/src/cuda/concat/concat.cuh
+++ b/dnn/src/cuda/concat/concat.cuh
@@ -10,6 +10,7 @@
 */
 #pragma once
 #include <stdint.h>
+#include "src/cuda/utils.cuh"
 namespace megdnn {
 namespace cuda {

--- a/dnn/src/cuda/dot/dot.cuh
+++ b/dnn/src/cuda/dot/dot.cuh
@@ -10,6 +10,7 @@
 */
 #pragma once
 #include "megdnn/dtype.h"
+#include "src/cuda/utils.cuh"
 namespace megdnn {
 namespace cuda {

--- a/dnn/src/cuda/repeat/repeat.cuh
+++ b/dnn/src/cuda/repeat/repeat.cuh
@@ -9,6 +9,7 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 #pragma once
+#include "src/cuda/utils.cuh"
 namespace megdnn {
 namespace cuda {

--- a/dnn/src/cuda/tile/tile.cuh
+++ b/dnn/src/cuda/tile/tile.cuh
@@ -9,6 +9,7 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 #pragma once
+#include "src/cuda/utils.cuh"
 namespace megdnn {
 namespace cuda {

--- a/dnn/src/cuda/warp_affine/common.cuh
+++ b/dnn/src/cuda/warp_affine/common.cuh
@@ -9,6 +9,7 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 #pragma once
+#include "src/cuda/utils.cuh"
 namespace megdnn {
 namespace cuda {

--- a/dnn/src/cuda/warp_perspective/common.cuh
+++ b/dnn/src/cuda/warp_perspective/common.cuh
@@ -9,6 +9,7 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 #pragma once
+#include "src/cuda/utils.cuh"
 namespace megdnn {
 namespace cuda {

--- a/dnn/src/rocm/handle.h
+++ b/dnn/src/rocm/handle.h
@@ -9,6 +9,8 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 #pragma once
+#include "src/rocm/miopen_wrapper.h"
 #include "megcore_rocm.h"
 #include "megdnn/basic_types.h"
 #include "megdnn/handle.h"
@@ -16,7 +18,6 @@
 #include "src/common/handle_impl.h"
 #include "src/common/utils.h"
-#include "src/rocm/miopen_with_check.h"
 #include <rocblas.h>
 #include <atomic>

--- a/dnn/src/x86/avx_helper.h
+++ b/dnn/src/x86/avx_helper.h
@@ -13,9 +13,11 @@
 #include "megdnn/arch.h"
 #include <immintrin.h>
+#ifdef WIN32
 #include <avxintrin.h>
 #include <avx2intrin.h>
 #include <fmaintrin.h>
+#endif
 #if !defined (__clang__)
 #pragma GCC target ("avx")

--- a/dnn/src/x86/conv_bias/f32/do_conv_stride2.h
+++ b/dnn/src/x86/conv_bias/f32/do_conv_stride2.h
@@ -9,7 +9,8 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 #pragma once
+// clang-format off
 #include "src/x86/simd_macro/sse_helper.h"
 #include "src/fallback/convolution/do_conv_stride2_decl.inl"
 #include "src/x86/simd_macro/sse_helper_epilogue.h"
+// clang-format on
--- a/dnn/src/x86/convolution/avx/convolution_conv_fh1_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_conv_fh1_avx.cpp
@@ -801,8 +801,7 @@
        }                                                     \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/avx/convolution_conv_fh2_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_conv_fh2_avx.cpp
@@ -896,8 +896,7 @@
        }                                                     \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/avx/convolution_conv_fh3_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_conv_fh3_avx.cpp
@@ -943,8 +943,7 @@
        }                                                     \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/avx/convolution_conv_fh4_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_conv_fh4_avx.cpp
@@ -948,8 +948,7 @@
        }                                                     \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/avx/convolution_conv_fh5_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_conv_fh5_avx.cpp
@@ -917,8 +917,7 @@
        }                                                     \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/avx/convolution_conv_fh6_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_conv_fh6_avx.cpp
@@ -856,8 +856,7 @@
        }                                                     \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/avx/convolution_conv_fh7_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_conv_fh7_avx.cpp
@@ -771,8 +771,7 @@
        }                                                     \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh1_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh1_avx.cpp
@@ -788,8 +788,7 @@
        }                                                                  \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh2_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh2_avx.cpp
@@ -872,8 +872,7 @@
        }                                                                  \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh3_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh3_avx.cpp
@@ -910,8 +910,7 @@
        }                                                                  \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh4_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh4_avx.cpp
@@ -908,8 +908,7 @@
        }                                                                  \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh5_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh5_avx.cpp
@@ -872,8 +872,7 @@
        }                                                                  \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh6_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh6_avx.cpp
@@ -808,8 +808,7 @@
        }                                                                  \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh7_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh7_avx.cpp
@@ -722,8 +722,7 @@
        }                                                                  \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/fma/convolution_conv_fh1_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_conv_fh1_fma.cpp
@@ -785,9 +785,7 @@
        }                                                     \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
-#include <fmaintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/fma/convolution_conv_fh2_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_conv_fh2_fma.cpp
@@ -827,9 +827,7 @@
        }                                                     \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
-#include <fmaintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/fma/convolution_conv_fh3_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_conv_fh3_fma.cpp
@@ -842,9 +842,7 @@
        }                                                     \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
-#include <fmaintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/fma/convolution_conv_fh4_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_conv_fh4_fma.cpp
@@ -833,9 +833,7 @@
        }                                                     \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
-#include <fmaintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/fma/convolution_conv_fh5_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_conv_fh5_fma.cpp
@@ -803,9 +803,7 @@
        }                                                     \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
-#include <fmaintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/fma/convolution_conv_fh6_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_conv_fh6_fma.cpp
@@ -755,9 +755,7 @@
        }                                                     \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
-#include <fmaintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/fma/convolution_conv_fh7_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_conv_fh7_fma.cpp
@@ -692,9 +692,7 @@
        }                                                     \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
-#include <fmaintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh1_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh1_fma.cpp
@@ -771,9 +771,7 @@
        }                                                                  \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
-#include <fmaintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh2_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh2_fma.cpp
@@ -801,9 +801,7 @@
        }                                                                  \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
-#include <fmaintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh3_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh3_fma.cpp
@@ -806,9 +806,7 @@
        }                                                                  \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
-#include <fmaintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh4_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh4_fma.cpp
@@ -789,9 +789,7 @@
        }                                                                  \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
-#include <fmaintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh5_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh5_fma.cpp
@@ -753,9 +753,7 @@
        }                                                                  \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
-#include <fmaintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh6_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh6_fma.cpp
@@ -701,9 +701,7 @@
        }                                                                  \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
-#include <fmaintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh7_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh7_fma.cpp
@@ -636,9 +636,7 @@
        }                                                                  \
    } while (0)
-#include <immintrin.h>
+#include "src/x86/avx_helper.h"
-#include <avxintrin.h>
-#include <fmaintrin.h>
 #include <algorithm>
 #include "../convolution_direct_special_cases.h"

--- a/dnn/src/x86/local/local_avx.cpp
+++ b/dnn/src/x86/local/local_avx.cpp
@@ -8,6 +8,8 @@
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
+// clang-format off
 #include "src/x86/simd_helper.h"
 #include "src/x86/simd_macro/avx_helper.h"
 #include "src/common/local/local_def.inl"
+// clang-format on
--- a/dnn/src/x86/local/local_fma.cpp
+++ b/dnn/src/x86/local/local_fma.cpp
@@ -8,6 +8,8 @@
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
+// clang-format off
 #include "src/x86/simd_helper.h"
 #include "src/x86/simd_macro/fma_helper.h"
 #include "src/common/local/local_def.inl"
+// clang-format on
--- a/dnn/src/x86/local/local_simd.h
+++ b/dnn/src/x86/local/local_simd.h
@@ -10,6 +10,7 @@
 */
 #pragma once
+// clang-format off
 #include "src/x86/simd_macro/sse_helper.h"
 #include "src/common/local/local_decl.inl"
 #include "src/x86/simd_macro/sse_helper_epilogue.h"
@@ -21,3 +22,4 @@
 #include "src/x86/simd_macro/fma_helper.h"
 #include "src/common/local/local_decl.inl"
 #include "src/x86/simd_macro/fma_helper_epilogue.h"
+// clang-format on
--- a/dnn/src/x86/local/local_sse.cpp
+++ b/dnn/src/x86/local/local_sse.cpp
@@ -8,6 +8,8 @@
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
+// clang-format off
 #include "src/x86/simd_helper.h"
 #include "src/x86/simd_macro/sse_helper.h"
 #include "src/common/local/local_def.inl"
+// clang-form on
--- a/dnn/src/x86/matrix_mul/common/common.h
+++ b/dnn/src/x86/matrix_mul/common/common.h
@@ -11,7 +11,6 @@
 */
 #pragma once
 #include <x86intrin.h>
 #ifdef WIN32
 #include <avx2intrin.h>
 #include <avxintrin.h>

--- a/dnn/src/x86/simd_helper.h
+++ b/dnn/src/x86/simd_helper.h
@@ -13,9 +13,11 @@
 #include "megdnn/arch.h"
 #include <immintrin.h>
+#ifdef WIN32
 #include <xmmintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
+#endif
 #include <cmath>
 #include <algorithm>

--- a/imperative/tablegen/emitter.h
+++ b/imperative/tablegen/emitter.h
@@ -17,6 +17,7 @@
 #include "llvm/Support/raw_ostream.h"
 namespace mlir::tblgen {
+using llvm::raw_ostream;
 struct Environment {
    std::unordered_map<unsigned int, std::pair<llvm::StringRef, llvm::StringRef>> enumAlias;
@@ -37,4 +38,4 @@ protected:
    Environment* env_p = nullptr;
 };
 } // namespace mlir::tblgen
\ No newline at end of file
--- a/imperative/tablegen/targets/macros.cpp
+++ b/imperative/tablegen/targets/macros.cpp
@@ -9,6 +9,7 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
+#include "./macros.h"
 #include "./cpp_class.h"
 #include "../emitter.h"

--- a/src/core/impl/graph/var_node_mem_mgr.cpp
+++ b/src/core/impl/graph/var_node_mem_mgr.cpp
@@ -125,7 +125,7 @@ StaticDeviceMemoryManager::make_default_impl() {
 #endif  // MGB_THREAD_SAFE
 /* ==================== AsyncVarReleaser ==================== */
-#if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON  || MGB_ROCM
+#if MGB_COMMON_ASYNC_COMPNODE
 class VarNodeMemManager::AsyncVarReleaser {
    struct WaiterParam {
        CompNode cn;
@@ -248,7 +248,7 @@ bool VarNodeMemManager::ImpureMemPlanManager::check_need_realloc() {
 VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl* graph)
        : m_owner_graph(graph),
          m_seq_mem_opt(graph)
-#if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON  || MGB_ROCM
+#if MGB_COMMON_ASYNC_COMPNODE
          ,m_asyn_var_releaser(new AsyncVarReleaser)
 #endif
 {
@@ -256,7 +256,7 @@ VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl* graph)
        MGB_MARK_USED_VAR(ev);
        // async release is only used for sync between multiple comp nodes, and
        // does not wait for device to finish
-#if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON  || MGB_ROCM
+#if MGB_COMMON_ASYNC_COMPNODE
        m_asyn_var_releaser->wait_release_finish();
 #endif
        m_cpu_async_release_barrier.wait_zero();
@@ -297,8 +297,7 @@ VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl* graph)
    graph->event().register_receiver_permanent<event::CompSeqExecError>(
            on_comp_seq_error);
-#if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER &&                                   \
+#if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER && MGB_COMMON_ASYNC_COMPNODE
-        (MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON  || MGB_ROCM)
    auto on_mem_defrag_start = [this](const event::BeforeMemDefrag&) {
        m_asyn_var_releaser->wait_release_finish();
    };

--- a/src/core/impl/graph/var_node_mem_mgr.h
+++ b/src/core/impl/graph/var_node_mem_mgr.h
@@ -445,7 +445,12 @@ class VarNodeMemManager {
        SyncableCounter m_cpu_async_release_barrier;
-#if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON  || MGB_ROCM
+// clang-format off
+#define MGB_COMMON_ASYNC_COMPNODE \
+    (MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON  || MGB_ROCM)
+    // clang-format on
+#if MGB_COMMON_ASYNC_COMPNODE
        //! release dynamic var on after compnode event finishes
        class AsyncVarReleaser;
        std::unique_ptr<AsyncVarReleaser> m_asyn_var_releaser;

--- a/src/core/include/megbrain/utils/thread_impl_spinlock.h
+++ b/src/core/include/megbrain/utils/thread_impl_spinlock.h
@@ -14,6 +14,7 @@
 #include "megbrain/common.h"
 #include <thread>
 #include <atomic>
+#include "megbrain/utils/metahelper.h"
 namespace mgb {
@@ -24,7 +25,7 @@ class Spinlock final: public NonCopyableObj {
    public:
        void lock() {
-            while (m_state.test_and_set(std::memory_order_acquire));
+            while (m_state.test_and_set(std::memory_order_acquire)) {};
        }
        void unlock() {

--- a/src/opr/include/megbrain/opr/basic_arith.h
+++ b/src/opr/include/megbrain/opr/basic_arith.h
@@ -281,8 +281,8 @@ MGB_DEFINE_OPR_CLASS(AddUpdate,
 * Mode specifies the actual arithmetic; and exactly one of *axis* and
 * *target_shape* must be provided, to specify output shape.
 */
-MGB_DEFINE_OPR_CLASS(Reduce, intl::DynamicOutputIfInputDynamic<
+MGB_DEFINE_OPR_CLASS(Reduce,
-        intl::OutshapeBySymvarSCNOpr<mixin::MegDNNOprHolder>>) //  {
+        intl::DynamicOutputIfInputDynamic<intl::OutshapeBySymvarSCNOpr<mixin::MegDNNOprHolder>>) // {
    public:
        using Param = megdnn::param::Reduce;
@@ -350,16 +350,17 @@ MGB_DEFINE_OPR_CLASS(Reduce, intl::DynamicOutputIfInputDynamic<
 * the optimizer.
 */
 MGB_DEFINE_OPR_CLASS(PowC, intl::MegDNNOprWrapperFwd<megdnn::PowC>) // {
+public:
+    PowC(VarNode* inp, const Param& param, const OperatorNodeConfig& config);
+    static SymbolVar make(SymbolVar inp, const Param& param = {},
+                          const OperatorNodeConfig& config = {});
+private:
    void add_input_layout_constraint() override;
    void init_output_static_infer_desc() override;
    void mem_plan_fwd_in2out_writable() override;
    NodeProp* do_make_node_prop() const override;
    void scn_do_execute() override;
-public:
-    PowC(VarNode* inp, const Param& param, const OperatorNodeConfig& config);
-    static SymbolVar make(SymbolVar inp, const Param& param = {},
-                          const OperatorNodeConfig& config = {});
 };
 } // namespace opr

--- a/src/opr/test/atlas_models.h
+++ b/src/opr/test/atlas_models.h
-//generated by tools/atlas/embed.py
+// generated by tools/atlas/embed.py
+// clang-format off
 #pragma once
 #include <map>
 #include <string>