diff --git a/dnn/src/aarch64/matrix_mul/int8x8x16/kernel_mk4_4x4x8_a72.h b/dnn/src/aarch64/matrix_mul/int8x8x16/kernel_mk4_4x4x8_a72.h index 00a142ab5c16d04aae192e35fc285cf2486f0bb0..88d2c1536f762b5f1b65c292a0c3ae4c8ee8cb69 100644 --- a/dnn/src/aarch64/matrix_mul/int8x8x16/kernel_mk4_4x4x8_a72.h +++ b/dnn/src/aarch64/matrix_mul/int8x8x16/kernel_mk4_4x4x8_a72.h @@ -53,7 +53,9 @@ static inline void kern_4x4(const int8_t* packA, const int8_t* packB, int K, const int8_t* b_ptr = packB; LDC = LDC * sizeof(int8_t); -// clang-format off + + // clang-format off + #define STORE_LINE(reg0) \ "cmp w10, #0 \n" \ "beq 101f\n" \ diff --git a/dnn/src/arm_common/elemwise/opr_impl.h b/dnn/src/arm_common/elemwise/opr_impl.h index 17f5bda0a499a38866efd3c3ee468a64b05b2097..f22db09ca28f197a8c4e910c15fd84124ca550f3 100644 --- a/dnn/src/arm_common/elemwise/opr_impl.h +++ b/dnn/src/arm_common/elemwise/opr_impl.h @@ -10,7 +10,6 @@ * implied. */ #pragma once - #include "src/fallback/elemwise/opr_impl.h" #include "src/arm_common/elemwise_op.h" diff --git a/dnn/src/arm_common/matrix_mul/int8/gemv.cpp b/dnn/src/arm_common/matrix_mul/int8/gemv.cpp index d2b81d420fa64a567fbd9068f0ead4fbe3e6ff26..0170dfecd67e8b35cec61ab1593756bae41279df 100644 --- a/dnn/src/arm_common/matrix_mul/int8/gemv.cpp +++ b/dnn/src/arm_common/matrix_mul/int8/gemv.cpp @@ -10,6 +10,7 @@ */ #include "src/arm_common/simd_macro/marm_neon.h" + #include "src/arm_common/matrix_mul/int8/gemv.h" #include "src/common/utils.h" #include "megdnn/oprs.h" diff --git a/dnn/src/common/cv/interp_helper.cpp b/dnn/src/common/cv/interp_helper.cpp index d2cc60cdea70113d6c66672e8e7cb0258a8ae499..310d74d9bff1c79eae923dc87acfc79fdd609dc4 100644 --- a/dnn/src/common/cv/interp_helper.cpp +++ b/dnn/src/common/cv/interp_helper.cpp @@ -60,11 +60,8 @@ #pragma GCC diagnostic ignored "-Wnon-virtual-dtor" // TableHolderBase has no problem; ignore the warning for old clang versions -#include "./helper.h" #include "./interp_helper.h" -#include "src/common/utils.h" - using namespace megdnn; using namespace megdnn::megcv; diff --git a/dnn/src/common/cv/interp_helper.h b/dnn/src/common/cv/interp_helper.h index 922a14a4dac80b3631566029a53382f229c611ae..58cb27435e73447305849227f8e6a8e8720ea658 100644 --- a/dnn/src/common/cv/interp_helper.h +++ b/dnn/src/common/cv/interp_helper.h @@ -62,7 +62,9 @@ #pragma once #include "src/common/cv/aligned_allocator.h" +#include "src/common/utils.h" +#include "./helper.h" #include "megdnn/opr_param_defs.h" #include diff --git a/dnn/src/cuda/batch_conv_bias/helper.cuh b/dnn/src/cuda/batch_conv_bias/helper.cuh index 886a87582e5e47a99b66a8c9663d2453b256c23c..4434a67518ca70435de3819ed3dd007626f0ee4f 100644 --- a/dnn/src/cuda/batch_conv_bias/helper.cuh +++ b/dnn/src/cuda/batch_conv_bias/helper.cuh @@ -10,6 +10,7 @@ */ #pragma once #include "src/cuda/convolution_helper/parameter.cuh" +#include "src/cuda/utils.cuh" namespace megdnn { namespace cuda { diff --git a/dnn/src/cuda/concat/concat.cuh b/dnn/src/cuda/concat/concat.cuh index a3d555c481f12541611c581e8f4d967a133e8ff8..e0d50bafb49ad4f9064377b16e9a0432f61497d0 100644 --- a/dnn/src/cuda/concat/concat.cuh +++ b/dnn/src/cuda/concat/concat.cuh @@ -10,6 +10,7 @@ */ #pragma once #include +#include "src/cuda/utils.cuh" namespace megdnn { namespace cuda { diff --git a/dnn/src/cuda/dot/dot.cuh b/dnn/src/cuda/dot/dot.cuh index 10a78c0908ba2d19c85f305aad728c6eb8b6e806..579d1eb5a2faed917762a7246d9239103c5d8a10 100644 --- a/dnn/src/cuda/dot/dot.cuh +++ b/dnn/src/cuda/dot/dot.cuh @@ -10,6 +10,7 @@ */ #pragma once #include "megdnn/dtype.h" +#include "src/cuda/utils.cuh" namespace megdnn { namespace cuda { diff --git a/dnn/src/cuda/repeat/repeat.cuh b/dnn/src/cuda/repeat/repeat.cuh index 4d5782c1041be6430d0ff892c081ec091c27fbe2..c317a63e7c8308abeddedb50d0337b7496d39390 100644 --- a/dnn/src/cuda/repeat/repeat.cuh +++ b/dnn/src/cuda/repeat/repeat.cuh @@ -9,6 +9,7 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #pragma once +#include "src/cuda/utils.cuh" namespace megdnn { namespace cuda { diff --git a/dnn/src/cuda/tile/tile.cuh b/dnn/src/cuda/tile/tile.cuh index e0e746878f72ad657f7573790cdc0da69bfe765d..e773498260d10d643e09a13e10c5b6589fbab4ac 100644 --- a/dnn/src/cuda/tile/tile.cuh +++ b/dnn/src/cuda/tile/tile.cuh @@ -9,6 +9,7 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #pragma once +#include "src/cuda/utils.cuh" namespace megdnn { namespace cuda { diff --git a/dnn/src/cuda/warp_affine/common.cuh b/dnn/src/cuda/warp_affine/common.cuh index 3465d77df6dfb78793a93f4797648fad3f336f8a..d11a1061d6fa2722a5b6b55429dc133f0db0c202 100644 --- a/dnn/src/cuda/warp_affine/common.cuh +++ b/dnn/src/cuda/warp_affine/common.cuh @@ -9,6 +9,7 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #pragma once +#include "src/cuda/utils.cuh" namespace megdnn { namespace cuda { diff --git a/dnn/src/cuda/warp_perspective/common.cuh b/dnn/src/cuda/warp_perspective/common.cuh index 2ab6899dda5d3927b27976d641e5c742d07b92db..ef25cac32c98475ac6e789706558530ac8f83b5c 100644 --- a/dnn/src/cuda/warp_perspective/common.cuh +++ b/dnn/src/cuda/warp_perspective/common.cuh @@ -9,6 +9,7 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #pragma once +#include "src/cuda/utils.cuh" namespace megdnn { namespace cuda { diff --git a/dnn/src/rocm/handle.h b/dnn/src/rocm/handle.h index 367e9687c3991c8d7e290b4c11d2a1deb8374cb6..2fcce60c62bacf6d06f7fcb5f8d2d47287d3eb26 100644 --- a/dnn/src/rocm/handle.h +++ b/dnn/src/rocm/handle.h @@ -9,6 +9,8 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #pragma once +#include "src/rocm/miopen_wrapper.h" + #include "megcore_rocm.h" #include "megdnn/basic_types.h" #include "megdnn/handle.h" @@ -16,7 +18,6 @@ #include "src/common/handle_impl.h" #include "src/common/utils.h" -#include "src/rocm/miopen_with_check.h" #include #include diff --git a/dnn/src/x86/avx_helper.h b/dnn/src/x86/avx_helper.h index 86974b7d2def9311386adf27eed61356befd02db..7830847b1a4541a881c6ab1c91382d89da00ada4 100644 --- a/dnn/src/x86/avx_helper.h +++ b/dnn/src/x86/avx_helper.h @@ -13,9 +13,11 @@ #include "megdnn/arch.h" #include +#ifdef WIN32 #include #include #include +#endif #if !defined (__clang__) #pragma GCC target ("avx") diff --git a/dnn/src/x86/conv_bias/f32/do_conv_stride2.h b/dnn/src/x86/conv_bias/f32/do_conv_stride2.h index 51b190efeb4a7e2c427c9a0658a9755774df84f5..b3acde4355a26c4172a317cd1db111327faf27be 100644 --- a/dnn/src/x86/conv_bias/f32/do_conv_stride2.h +++ b/dnn/src/x86/conv_bias/f32/do_conv_stride2.h @@ -9,7 +9,8 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #pragma once - +// clang-format off #include "src/x86/simd_macro/sse_helper.h" #include "src/fallback/convolution/do_conv_stride2_decl.inl" #include "src/x86/simd_macro/sse_helper_epilogue.h" +// clang-format on diff --git a/dnn/src/x86/convolution/avx/convolution_conv_fh1_avx.cpp b/dnn/src/x86/convolution/avx/convolution_conv_fh1_avx.cpp index 0aa4d7d2d30d4ecf15b5e099bb552c3c7082c24e..c3ec563536b9564f581967521b0aa63288aa36ab 100644 --- a/dnn/src/x86/convolution/avx/convolution_conv_fh1_avx.cpp +++ b/dnn/src/x86/convolution/avx/convolution_conv_fh1_avx.cpp @@ -801,8 +801,7 @@ } \ } while (0) -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/avx/convolution_conv_fh2_avx.cpp b/dnn/src/x86/convolution/avx/convolution_conv_fh2_avx.cpp index 9fe74a55a081d74b6de1fb944924d685b127e428..8973586fcb7c53d2c9bf6623678f504a8cbc2a29 100644 --- a/dnn/src/x86/convolution/avx/convolution_conv_fh2_avx.cpp +++ b/dnn/src/x86/convolution/avx/convolution_conv_fh2_avx.cpp @@ -896,8 +896,7 @@ } \ } while (0) -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/avx/convolution_conv_fh3_avx.cpp b/dnn/src/x86/convolution/avx/convolution_conv_fh3_avx.cpp index fccebbc3531d652e7e2bd1c1c96587e0f8014b76..de3c15af706395ba437851fcd34fd53ebaf2898c 100644 --- a/dnn/src/x86/convolution/avx/convolution_conv_fh3_avx.cpp +++ b/dnn/src/x86/convolution/avx/convolution_conv_fh3_avx.cpp @@ -943,8 +943,7 @@ } \ } while (0) -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/avx/convolution_conv_fh4_avx.cpp b/dnn/src/x86/convolution/avx/convolution_conv_fh4_avx.cpp index bb313ceae191da1f15b2656a40a757af4ac5aa93..6859e6879da68122eb7a0bd00edccd1885a2cc97 100644 --- a/dnn/src/x86/convolution/avx/convolution_conv_fh4_avx.cpp +++ b/dnn/src/x86/convolution/avx/convolution_conv_fh4_avx.cpp @@ -948,8 +948,7 @@ } \ } while (0) -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/avx/convolution_conv_fh5_avx.cpp b/dnn/src/x86/convolution/avx/convolution_conv_fh5_avx.cpp index 36d40a9f4a2f757b34eb3a001582cd7d11d10943..5004a44968ae273f20e9099caadaf22af06d5b84 100644 --- a/dnn/src/x86/convolution/avx/convolution_conv_fh5_avx.cpp +++ b/dnn/src/x86/convolution/avx/convolution_conv_fh5_avx.cpp @@ -917,8 +917,7 @@ } \ } while (0) -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/avx/convolution_conv_fh6_avx.cpp b/dnn/src/x86/convolution/avx/convolution_conv_fh6_avx.cpp index 175bff87ff45a9ea0576a083c36c0f65eb3b2c90..d7481190899a52b2c417ead7edb6872fdc604b3d 100644 --- a/dnn/src/x86/convolution/avx/convolution_conv_fh6_avx.cpp +++ b/dnn/src/x86/convolution/avx/convolution_conv_fh6_avx.cpp @@ -856,8 +856,7 @@ } \ } while (0) -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/avx/convolution_conv_fh7_avx.cpp b/dnn/src/x86/convolution/avx/convolution_conv_fh7_avx.cpp index 043def68d3b5466230c0589de0fa46103d76c2f0..b0a49e018fc88cdd357a5d81edeb9e284e2032be 100644 --- a/dnn/src/x86/convolution/avx/convolution_conv_fh7_avx.cpp +++ b/dnn/src/x86/convolution/avx/convolution_conv_fh7_avx.cpp @@ -771,8 +771,7 @@ } \ } while (0) -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/avx/convolution_xcorr_fh1_avx.cpp b/dnn/src/x86/convolution/avx/convolution_xcorr_fh1_avx.cpp index 7072016f75d9f42605885166614a478229390d34..f652aafc963dee5006fd2b522148ffa113c33f48 100644 --- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh1_avx.cpp +++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh1_avx.cpp @@ -788,8 +788,7 @@ } \ } while (0) -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/avx/convolution_xcorr_fh2_avx.cpp b/dnn/src/x86/convolution/avx/convolution_xcorr_fh2_avx.cpp index 36a13994e9f0c2235eeb4292334c9b49a778fb39..71f411ade16d320b7a642b73ccd9589766dd6782 100644 --- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh2_avx.cpp +++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh2_avx.cpp @@ -872,8 +872,7 @@ } \ } while (0) -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/avx/convolution_xcorr_fh3_avx.cpp b/dnn/src/x86/convolution/avx/convolution_xcorr_fh3_avx.cpp index 108fcaaaf6727f7ed15196bc1b79a11e3aaa03fd..49b75945f1213d359364c01d3a5663f335d49104 100644 --- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh3_avx.cpp +++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh3_avx.cpp @@ -910,8 +910,7 @@ } \ } while (0) -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/avx/convolution_xcorr_fh4_avx.cpp b/dnn/src/x86/convolution/avx/convolution_xcorr_fh4_avx.cpp index b2d8663f8a5bf5b42acd93a97fab189c9f72d504..3887ee50a43dba7be4250d49a8f2f4dd892a72bf 100644 --- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh4_avx.cpp +++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh4_avx.cpp @@ -908,8 +908,7 @@ } \ } while (0) -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/avx/convolution_xcorr_fh5_avx.cpp b/dnn/src/x86/convolution/avx/convolution_xcorr_fh5_avx.cpp index bcf0e501479c244f4ed12a7288672e57b9f2dd76..8e5dc0c54affa4fc978fdb8d7841abc897187637 100644 --- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh5_avx.cpp +++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh5_avx.cpp @@ -872,8 +872,7 @@ } \ } while (0) -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/avx/convolution_xcorr_fh6_avx.cpp b/dnn/src/x86/convolution/avx/convolution_xcorr_fh6_avx.cpp index 9a460af3aacfb88b157314091605829878fc88a7..d5d94373be0bc152347c8aad619626f510087ec7 100644 --- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh6_avx.cpp +++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh6_avx.cpp @@ -808,8 +808,7 @@ } \ } while (0) -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/avx/convolution_xcorr_fh7_avx.cpp b/dnn/src/x86/convolution/avx/convolution_xcorr_fh7_avx.cpp index 4071f97f65c750cb2a02a164209611a6653b97f7..557966972d254680c96457ed0dff33b72a8a3fe1 100644 --- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh7_avx.cpp +++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh7_avx.cpp @@ -722,8 +722,7 @@ } \ } while (0) -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/fma/convolution_conv_fh1_fma.cpp b/dnn/src/x86/convolution/fma/convolution_conv_fh1_fma.cpp index 82a1d39b134e00f27d112f7664a8741c48ccaa59..b960667436ebe515a46d724c8e27e57237b9369c 100644 --- a/dnn/src/x86/convolution/fma/convolution_conv_fh1_fma.cpp +++ b/dnn/src/x86/convolution/fma/convolution_conv_fh1_fma.cpp @@ -785,9 +785,7 @@ } \ } while (0) -#include -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/fma/convolution_conv_fh2_fma.cpp b/dnn/src/x86/convolution/fma/convolution_conv_fh2_fma.cpp index ffcced381f830563abffaa9b2aa5cd165453a8e7..b0b79545b0a21813e654b0a3d9c3281c83136571 100644 --- a/dnn/src/x86/convolution/fma/convolution_conv_fh2_fma.cpp +++ b/dnn/src/x86/convolution/fma/convolution_conv_fh2_fma.cpp @@ -827,9 +827,7 @@ } \ } while (0) -#include -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/fma/convolution_conv_fh3_fma.cpp b/dnn/src/x86/convolution/fma/convolution_conv_fh3_fma.cpp index bf3ff6190410435b960cc5b8d13f3aacf74e42e8..0d31020c731c41a271b973dd91f30de44bd1d5f8 100644 --- a/dnn/src/x86/convolution/fma/convolution_conv_fh3_fma.cpp +++ b/dnn/src/x86/convolution/fma/convolution_conv_fh3_fma.cpp @@ -842,9 +842,7 @@ } \ } while (0) -#include -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/fma/convolution_conv_fh4_fma.cpp b/dnn/src/x86/convolution/fma/convolution_conv_fh4_fma.cpp index 6e16fbecbbfc2275ba9adc48fb9f76b80e4a2591..7bfcec0fd0498784d1fb502e0968ca89dc02cc4f 100644 --- a/dnn/src/x86/convolution/fma/convolution_conv_fh4_fma.cpp +++ b/dnn/src/x86/convolution/fma/convolution_conv_fh4_fma.cpp @@ -833,9 +833,7 @@ } \ } while (0) -#include -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/fma/convolution_conv_fh5_fma.cpp b/dnn/src/x86/convolution/fma/convolution_conv_fh5_fma.cpp index b683c814c2fe82458149eacba3fe827e9ccefd4c..0c9ba85bae825a428400b618b227249a843ca55d 100644 --- a/dnn/src/x86/convolution/fma/convolution_conv_fh5_fma.cpp +++ b/dnn/src/x86/convolution/fma/convolution_conv_fh5_fma.cpp @@ -803,9 +803,7 @@ } \ } while (0) -#include -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/fma/convolution_conv_fh6_fma.cpp b/dnn/src/x86/convolution/fma/convolution_conv_fh6_fma.cpp index 892fdd740b3da77bc8133a276804054025872f1c..4c4c4634f9400b1bc039bf00a1568c275fa35ad4 100644 --- a/dnn/src/x86/convolution/fma/convolution_conv_fh6_fma.cpp +++ b/dnn/src/x86/convolution/fma/convolution_conv_fh6_fma.cpp @@ -755,9 +755,7 @@ } \ } while (0) -#include -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/fma/convolution_conv_fh7_fma.cpp b/dnn/src/x86/convolution/fma/convolution_conv_fh7_fma.cpp index 69ea73557f3283d59e302fe3584a923e97ac477a..0fceb5aaafe21b1d4349ae4a4d9028d6b657a444 100644 --- a/dnn/src/x86/convolution/fma/convolution_conv_fh7_fma.cpp +++ b/dnn/src/x86/convolution/fma/convolution_conv_fh7_fma.cpp @@ -692,9 +692,7 @@ } \ } while (0) -#include -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/fma/convolution_xcorr_fh1_fma.cpp b/dnn/src/x86/convolution/fma/convolution_xcorr_fh1_fma.cpp index 66502e636d448dfa5b7af11b581819fc5ee31d76..b8c3330ee0fda30b8da7653c122ac53e7e4e727e 100644 --- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh1_fma.cpp +++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh1_fma.cpp @@ -771,9 +771,7 @@ } \ } while (0) -#include -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/fma/convolution_xcorr_fh2_fma.cpp b/dnn/src/x86/convolution/fma/convolution_xcorr_fh2_fma.cpp index 3e3255d7e9e82b2d5473542a4900b47c199f8743..b39f59b993ccfc087136f540a6a715b823468dff 100644 --- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh2_fma.cpp +++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh2_fma.cpp @@ -801,9 +801,7 @@ } \ } while (0) -#include -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/fma/convolution_xcorr_fh3_fma.cpp b/dnn/src/x86/convolution/fma/convolution_xcorr_fh3_fma.cpp index 23daf4661dc9cc70ec7cd45cfe98092106a54668..3d219e4930b01863e97c562e6bc44c9b92ba10e3 100644 --- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh3_fma.cpp +++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh3_fma.cpp @@ -806,9 +806,7 @@ } \ } while (0) -#include -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/fma/convolution_xcorr_fh4_fma.cpp b/dnn/src/x86/convolution/fma/convolution_xcorr_fh4_fma.cpp index b502540a8f382f21801b2c09bc654826df0254e0..2999f4f6981a4c8c32b99a1eceeb818c07d133f7 100644 --- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh4_fma.cpp +++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh4_fma.cpp @@ -789,9 +789,7 @@ } \ } while (0) -#include -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/fma/convolution_xcorr_fh5_fma.cpp b/dnn/src/x86/convolution/fma/convolution_xcorr_fh5_fma.cpp index 948f265a25b4d38d723aeae60f268fa2826e8402..ff812dbe8474ead0e5911abddf2e99167012b051 100644 --- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh5_fma.cpp +++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh5_fma.cpp @@ -753,9 +753,7 @@ } \ } while (0) -#include -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/fma/convolution_xcorr_fh6_fma.cpp b/dnn/src/x86/convolution/fma/convolution_xcorr_fh6_fma.cpp index 0302cd2a2f2b1aed4c5373219470d2164063b65d..3f96514d2c43763ff9def61ad0b3b3fb178f43ea 100644 --- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh6_fma.cpp +++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh6_fma.cpp @@ -701,9 +701,7 @@ } \ } while (0) -#include -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/convolution/fma/convolution_xcorr_fh7_fma.cpp b/dnn/src/x86/convolution/fma/convolution_xcorr_fh7_fma.cpp index a422ba2a27c3a06625b36837541d6024bdfabc1d..25aef52e7ecedebad56dc96ecb8a6214e69262d6 100644 --- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh7_fma.cpp +++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh7_fma.cpp @@ -636,9 +636,7 @@ } \ } while (0) -#include -#include -#include +#include "src/x86/avx_helper.h" #include #include "../convolution_direct_special_cases.h" diff --git a/dnn/src/x86/local/local_avx.cpp b/dnn/src/x86/local/local_avx.cpp index 9c66a7c745508b0db8b74793901b1ea5c7575b6f..d6e4d440a85050dad2f51804e8abcbf9b1b0adc4 100644 --- a/dnn/src/x86/local/local_avx.cpp +++ b/dnn/src/x86/local/local_avx.cpp @@ -8,6 +8,8 @@ * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ +// clang-format off #include "src/x86/simd_helper.h" #include "src/x86/simd_macro/avx_helper.h" #include "src/common/local/local_def.inl" +// clang-format on diff --git a/dnn/src/x86/local/local_fma.cpp b/dnn/src/x86/local/local_fma.cpp index ceef48e2156941a539d6378196d3c0bf5ac4061b..00ddbc4bd3542cc6ca0de5c0d51fee8187d9513c 100644 --- a/dnn/src/x86/local/local_fma.cpp +++ b/dnn/src/x86/local/local_fma.cpp @@ -8,6 +8,8 @@ * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ +// clang-format off #include "src/x86/simd_helper.h" #include "src/x86/simd_macro/fma_helper.h" #include "src/common/local/local_def.inl" +// clang-format on diff --git a/dnn/src/x86/local/local_simd.h b/dnn/src/x86/local/local_simd.h index 8fe710c3413371bb583aa95d2f5c1405c1875383..46017375185014468ec2a8d3b5fdae9245af46bf 100644 --- a/dnn/src/x86/local/local_simd.h +++ b/dnn/src/x86/local/local_simd.h @@ -10,6 +10,7 @@ */ #pragma once +// clang-format off #include "src/x86/simd_macro/sse_helper.h" #include "src/common/local/local_decl.inl" #include "src/x86/simd_macro/sse_helper_epilogue.h" @@ -21,3 +22,4 @@ #include "src/x86/simd_macro/fma_helper.h" #include "src/common/local/local_decl.inl" #include "src/x86/simd_macro/fma_helper_epilogue.h" +// clang-format on diff --git a/dnn/src/x86/local/local_sse.cpp b/dnn/src/x86/local/local_sse.cpp index 14f2c4e13845f2463723525a35ef2953fd8cdc71..2acc0e92501b4e5eec9ae4dd1ba0198434eee64c 100644 --- a/dnn/src/x86/local/local_sse.cpp +++ b/dnn/src/x86/local/local_sse.cpp @@ -8,6 +8,8 @@ * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ +// clang-format off #include "src/x86/simd_helper.h" #include "src/x86/simd_macro/sse_helper.h" #include "src/common/local/local_def.inl" +// clang-form on diff --git a/dnn/src/x86/matrix_mul/common/common.h b/dnn/src/x86/matrix_mul/common/common.h index fb49e289ef0bac974aa6c34ae234420d68930117..d719ce28ef4742985d2ee9d7bd07cd9029d1c93f 100644 --- a/dnn/src/x86/matrix_mul/common/common.h +++ b/dnn/src/x86/matrix_mul/common/common.h @@ -11,7 +11,6 @@ */ #pragma once #include - #ifdef WIN32 #include #include diff --git a/dnn/src/x86/simd_helper.h b/dnn/src/x86/simd_helper.h index c9cc0a9940514196976b60c8db06c98ae7f2caae..f27b9117026f9fba3e29c764cdd90eefd16891b0 100644 --- a/dnn/src/x86/simd_helper.h +++ b/dnn/src/x86/simd_helper.h @@ -13,9 +13,11 @@ #include "megdnn/arch.h" #include +#ifdef WIN32 #include #include #include +#endif #include #include diff --git a/imperative/tablegen/emitter.h b/imperative/tablegen/emitter.h index 256da9701ed1aaf57b5993d2f20c9d09b7c0cad8..019bc12bcfe092a97640e72e56c31d4a877d02a9 100644 --- a/imperative/tablegen/emitter.h +++ b/imperative/tablegen/emitter.h @@ -17,6 +17,7 @@ #include "llvm/Support/raw_ostream.h" namespace mlir::tblgen { +using llvm::raw_ostream; struct Environment { std::unordered_map> enumAlias; @@ -37,4 +38,4 @@ protected: Environment* env_p = nullptr; }; -} // namespace mlir::tblgen \ No newline at end of file +} // namespace mlir::tblgen diff --git a/imperative/tablegen/targets/macros.cpp b/imperative/tablegen/targets/macros.cpp index 9df4256b1f1985e56ea2950d25de93707478c3ea..f355b3a4235dfdbc8e793180e1e4873e00c27b81 100644 --- a/imperative/tablegen/targets/macros.cpp +++ b/imperative/tablegen/targets/macros.cpp @@ -9,6 +9,7 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ +#include "./macros.h" #include "./cpp_class.h" #include "../emitter.h" diff --git a/src/core/impl/graph/var_node_mem_mgr.cpp b/src/core/impl/graph/var_node_mem_mgr.cpp index c5e65a8a7cba821ce9fb99229ef1b59901fd54f1..9babf8ce1559522b8165114365fb2b454e684194 100644 --- a/src/core/impl/graph/var_node_mem_mgr.cpp +++ b/src/core/impl/graph/var_node_mem_mgr.cpp @@ -125,7 +125,7 @@ StaticDeviceMemoryManager::make_default_impl() { #endif // MGB_THREAD_SAFE /* ==================== AsyncVarReleaser ==================== */ -#if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM +#if MGB_COMMON_ASYNC_COMPNODE class VarNodeMemManager::AsyncVarReleaser { struct WaiterParam { CompNode cn; @@ -248,7 +248,7 @@ bool VarNodeMemManager::ImpureMemPlanManager::check_need_realloc() { VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl* graph) : m_owner_graph(graph), m_seq_mem_opt(graph) -#if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM +#if MGB_COMMON_ASYNC_COMPNODE ,m_asyn_var_releaser(new AsyncVarReleaser) #endif { @@ -256,7 +256,7 @@ VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl* graph) MGB_MARK_USED_VAR(ev); // async release is only used for sync between multiple comp nodes, and // does not wait for device to finish -#if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM +#if MGB_COMMON_ASYNC_COMPNODE m_asyn_var_releaser->wait_release_finish(); #endif m_cpu_async_release_barrier.wait_zero(); @@ -297,8 +297,7 @@ VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl* graph) graph->event().register_receiver_permanent( on_comp_seq_error); -#if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER && \ - (MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM) +#if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER && MGB_COMMON_ASYNC_COMPNODE auto on_mem_defrag_start = [this](const event::BeforeMemDefrag&) { m_asyn_var_releaser->wait_release_finish(); }; diff --git a/src/core/impl/graph/var_node_mem_mgr.h b/src/core/impl/graph/var_node_mem_mgr.h index 953f23c1b72fbccfb3e52d5a193f2d86cea84ec4..30e9558870c75d69475009a5aec2fa30458cab74 100644 --- a/src/core/impl/graph/var_node_mem_mgr.h +++ b/src/core/impl/graph/var_node_mem_mgr.h @@ -445,7 +445,12 @@ class VarNodeMemManager { SyncableCounter m_cpu_async_release_barrier; -#if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM +// clang-format off +#define MGB_COMMON_ASYNC_COMPNODE \ + (MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM) + // clang-format on + +#if MGB_COMMON_ASYNC_COMPNODE //! release dynamic var on after compnode event finishes class AsyncVarReleaser; std::unique_ptr m_asyn_var_releaser; diff --git a/src/core/include/megbrain/utils/thread_impl_spinlock.h b/src/core/include/megbrain/utils/thread_impl_spinlock.h index f3cab5603d6722fb040d8151ae4a070f5ea4bb9c..315ef6512bc4a37c591d408a30d8286a07bedda7 100644 --- a/src/core/include/megbrain/utils/thread_impl_spinlock.h +++ b/src/core/include/megbrain/utils/thread_impl_spinlock.h @@ -14,6 +14,7 @@ #include "megbrain/common.h" #include #include +#include "megbrain/utils/metahelper.h" namespace mgb { @@ -24,7 +25,7 @@ class Spinlock final: public NonCopyableObj { public: void lock() { - while (m_state.test_and_set(std::memory_order_acquire)); + while (m_state.test_and_set(std::memory_order_acquire)) {}; } void unlock() { diff --git a/src/opr/include/megbrain/opr/basic_arith.h b/src/opr/include/megbrain/opr/basic_arith.h index 69edb1b753f4aa79f0036e5a43522e65af59ca88..8acbc2efa0b3ca5cf6c2e9847274463d8216092b 100644 --- a/src/opr/include/megbrain/opr/basic_arith.h +++ b/src/opr/include/megbrain/opr/basic_arith.h @@ -281,8 +281,8 @@ MGB_DEFINE_OPR_CLASS(AddUpdate, * Mode specifies the actual arithmetic; and exactly one of *axis* and * *target_shape* must be provided, to specify output shape. */ -MGB_DEFINE_OPR_CLASS(Reduce, intl::DynamicOutputIfInputDynamic< - intl::OutshapeBySymvarSCNOpr>) // { +MGB_DEFINE_OPR_CLASS(Reduce, + intl::DynamicOutputIfInputDynamic>) // { public: using Param = megdnn::param::Reduce; @@ -350,16 +350,17 @@ MGB_DEFINE_OPR_CLASS(Reduce, intl::DynamicOutputIfInputDynamic< * the optimizer. */ MGB_DEFINE_OPR_CLASS(PowC, intl::MegDNNOprWrapperFwd) // { +public: + PowC(VarNode* inp, const Param& param, const OperatorNodeConfig& config); + static SymbolVar make(SymbolVar inp, const Param& param = {}, + const OperatorNodeConfig& config = {}); + +private: void add_input_layout_constraint() override; void init_output_static_infer_desc() override; void mem_plan_fwd_in2out_writable() override; NodeProp* do_make_node_prop() const override; void scn_do_execute() override; - -public: - PowC(VarNode* inp, const Param& param, const OperatorNodeConfig& config); - static SymbolVar make(SymbolVar inp, const Param& param = {}, - const OperatorNodeConfig& config = {}); }; } // namespace opr diff --git a/src/opr/test/atlas_models.h b/src/opr/test/atlas_models.h index 05829c2b683842b1377d13dd5afbf01bd3694556..af7d1be75f93bd072c497685f7ca1ede9c4e84ca 100644 --- a/src/opr/test/atlas_models.h +++ b/src/opr/test/atlas_models.h @@ -1,4 +1,5 @@ -//generated by tools/atlas/embed.py +// generated by tools/atlas/embed.py +// clang-format off #pragma once #include #include