提交 04a57df8 编写于 作者: M Megvii Engine Team

fix(dnn/gi): remove lvaule reference and mednn macro in gi_common and gi_float16 to use it in C

GitOrigin-RevId: e191a254602eb41f3577e4c80a74826b7170a9f8
上级 dc98f7ed
...@@ -60,15 +60,15 @@ ...@@ -60,15 +60,15 @@
#define GI_NEON32_INTRINSICS #define GI_NEON32_INTRINSICS
#endif #endif
#elif defined(GI_TARGET_X86) #elif defined(GI_TARGET_X86)
//#if defined(__FMA__) // #if defined(__FMA__)
//#define GI_FMA_INTRINSICS // #define GI_FMA_INTRINSICS
//#define GI_AVX2_INTRINSICS // #define GI_AVX2_INTRINSICS
//#define GI_AVX_INTRINSICS // #define GI_AVX_INTRINSICS
//#elif defined(__AVX2__) // #elif defined(__AVX2__)
//#define GI_AVX2_INTRINSICS // #define GI_AVX2_INTRINSICS
//#define GI_AVX_INTRINSICS // #define GI_AVX_INTRINSICS
//#elif defined(__AVX__) // #elif defined(__AVX__)
//#define GI_AVX_INTRINSICS // #define GI_AVX_INTRINSICS
#if defined(__SSE4_2__) #if defined(__SSE4_2__)
#define GI_SSE42_INTRINSICS #define GI_SSE42_INTRINSICS
#define GI_SSE2_INTRINSICS #define GI_SSE2_INTRINSICS
...@@ -94,7 +94,7 @@ ...@@ -94,7 +94,7 @@
//! Gi fp16 only support arm64 neon and rvv //! Gi fp16 only support arm64 neon and rvv
#if (defined(GI_NEON_INTRINSICS) && __ARM_FEATURE_FP16_VECTOR_ARITHMETIC && \ #if (defined(GI_NEON_INTRINSICS) && __ARM_FEATURE_FP16_VECTOR_ARITHMETIC && \
MEGDNN_AARCH64) || \ defined(__aarch64__)) || \
defined(GI_RVV_INTRINSICS) defined(GI_RVV_INTRINSICS)
#define GI_SUPPORT_F16 #define GI_SUPPORT_F16
#endif #endif
......
...@@ -34,7 +34,7 @@ GI_FLOAT16_t GiLoadBroadcastFloat16(const gi_float16_t* Value) { ...@@ -34,7 +34,7 @@ GI_FLOAT16_t GiLoadBroadcastFloat16(const gi_float16_t* Value) {
} }
GI_FORCEINLINE GI_FORCEINLINE
GI_FLOAT32_V2_t GiCastFloat16ToFloat32(const GI_FLOAT16_t& fp16) { GI_FLOAT32_V2_t GiCastFloat16ToFloat32(const GI_FLOAT16_t fp16) {
#if defined(GI_NEON_INTRINSICS) #if defined(GI_NEON_INTRINSICS)
GI_FLOAT32_V2_t ret; GI_FLOAT32_V2_t ret;
GiSetSubVectorFloat32V2(ret, 0, vcvt_f32_f16(vget_low_f16(fp16))); GiSetSubVectorFloat32V2(ret, 0, vcvt_f32_f16(vget_low_f16(fp16)));
...@@ -51,7 +51,7 @@ GI_FLOAT32_V2_t GiCastFloat16ToFloat32(const GI_FLOAT16_t& fp16) { ...@@ -51,7 +51,7 @@ GI_FLOAT32_V2_t GiCastFloat16ToFloat32(const GI_FLOAT16_t& fp16) {
} }
GI_FORCEINLINE GI_FORCEINLINE
GI_FLOAT16_t GiCastFloat32ToFloat16(const GI_FLOAT32_t& low, const GI_FLOAT32_t& high) { GI_FLOAT16_t GiCastFloat32ToFloat16(const GI_FLOAT32_t low, const GI_FLOAT32_t high) {
#if defined(GI_NEON_INTRINSICS) #if defined(GI_NEON_INTRINSICS)
return vcombine_f16(vcvt_f16_f32(low), vcvt_f16_f32(high)); return vcombine_f16(vcvt_f16_f32(low), vcvt_f16_f32(high));
#elif defined(GI_RVV_INTRINSICS) #elif defined(GI_RVV_INTRINSICS)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册