diff --git a/dnn/src/fallback/general_intrinsic/gi_common.h b/dnn/src/fallback/general_intrinsic/gi_common.h index df076993af21d2fd020bfae152c2198ea0ed5313..5efca76fa4ffd8bbd8d000109947d58f26f5855c 100644 --- a/dnn/src/fallback/general_intrinsic/gi_common.h +++ b/dnn/src/fallback/general_intrinsic/gi_common.h @@ -60,15 +60,15 @@ #define GI_NEON32_INTRINSICS #endif #elif defined(GI_TARGET_X86) -//#if defined(__FMA__) -//#define GI_FMA_INTRINSICS -//#define GI_AVX2_INTRINSICS -//#define GI_AVX_INTRINSICS -//#elif defined(__AVX2__) -//#define GI_AVX2_INTRINSICS -//#define GI_AVX_INTRINSICS -//#elif defined(__AVX__) -//#define GI_AVX_INTRINSICS +// #if defined(__FMA__) +// #define GI_FMA_INTRINSICS +// #define GI_AVX2_INTRINSICS +// #define GI_AVX_INTRINSICS +// #elif defined(__AVX2__) +// #define GI_AVX2_INTRINSICS +// #define GI_AVX_INTRINSICS +// #elif defined(__AVX__) +// #define GI_AVX_INTRINSICS #if defined(__SSE4_2__) #define GI_SSE42_INTRINSICS #define GI_SSE2_INTRINSICS @@ -94,7 +94,7 @@ //! Gi fp16 only support arm64 neon and rvv #if (defined(GI_NEON_INTRINSICS) && __ARM_FEATURE_FP16_VECTOR_ARITHMETIC && \ - MEGDNN_AARCH64) || \ + defined(__aarch64__)) || \ defined(GI_RVV_INTRINSICS) #define GI_SUPPORT_F16 #endif diff --git a/dnn/src/fallback/general_intrinsic/gi_float16.h b/dnn/src/fallback/general_intrinsic/gi_float16.h index f85997208fa754222e09c7f0136d909f126e7868..3359c6b147fe333070ae43e781ea27cd8703b785 100644 --- a/dnn/src/fallback/general_intrinsic/gi_float16.h +++ b/dnn/src/fallback/general_intrinsic/gi_float16.h @@ -34,7 +34,7 @@ GI_FLOAT16_t GiLoadBroadcastFloat16(const gi_float16_t* Value) { } GI_FORCEINLINE -GI_FLOAT32_V2_t GiCastFloat16ToFloat32(const GI_FLOAT16_t& fp16) { +GI_FLOAT32_V2_t GiCastFloat16ToFloat32(const GI_FLOAT16_t fp16) { #if defined(GI_NEON_INTRINSICS) GI_FLOAT32_V2_t ret; GiSetSubVectorFloat32V2(ret, 0, vcvt_f32_f16(vget_low_f16(fp16))); @@ -51,7 +51,7 @@ GI_FLOAT32_V2_t GiCastFloat16ToFloat32(const GI_FLOAT16_t& fp16) { } GI_FORCEINLINE -GI_FLOAT16_t GiCastFloat32ToFloat16(const GI_FLOAT32_t& low, const GI_FLOAT32_t& high) { +GI_FLOAT16_t GiCastFloat32ToFloat16(const GI_FLOAT32_t low, const GI_FLOAT32_t high) { #if defined(GI_NEON_INTRINSICS) return vcombine_f16(vcvt_f16_f32(low), vcvt_f16_f32(high)); #elif defined(GI_RVV_INTRINSICS)