diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 3f0585b86a4798386383f67378e91982d6fe6984..f4bd2a3748a0ba30bdd34756e547932e0e4bd3d5 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -46,7 +46,7 @@ namespace mace { break; \ } -#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID) +#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) #define MACE_TYPE_ENUM_SWITCH_CASE_NEON(STATEMENTS) \ MACE_CASE(float16_t, MACE_SINGLE_ARG(STATEMENTS)) #else diff --git a/mace/core/types.cc b/mace/core/types.cc index 2b5393a631d97d31e5a30e27fe34b2615e56f89e..3e4225391d32c65c94e73673510de03a6b0750c7 100644 --- a/mace/core/types.cc +++ b/mace/core/types.cc @@ -47,7 +47,7 @@ size_t GetEnumTypeSize(const DataType dt) { return sizeof(float); case DT_HALF: return sizeof(half); -#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID) +#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) case DT_FLOAT16: return sizeof(float16_t); #endif diff --git a/mace/core/types.h b/mace/core/types.h index f965f286e7ec2ca22f09b5be85023559203b774f..f2fbad3099adb500ab763de6b93dfed59d33bc76 100644 --- a/mace/core/types.h +++ b/mace/core/types.h @@ -17,7 +17,7 @@ #include #include -#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID) +#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) #include #endif @@ -54,7 +54,7 @@ struct EnumToDataType; }; MACE_MAPPING_DATA_TYPE_AND_ENUM(half, DT_HALF); -#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID) +#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) MACE_MAPPING_DATA_TYPE_AND_ENUM(float16_t, DT_FLOAT16); #endif MACE_MAPPING_DATA_TYPE_AND_ENUM(float, DT_FLOAT); diff --git a/mace/ops/BUILD.bazel b/mace/ops/BUILD.bazel index 37fec862dd71bd56b4cde0d832112ff738b21244..7217567d9147eaf938c9e424f3ea77cda2b4417b 100644 --- a/mace/ops/BUILD.bazel +++ b/mace/ops/BUILD.bazel @@ -38,7 +38,7 @@ cc_library( ]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ - "-mfpu=neon", + "-mfpu=neon-fp16", "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", @@ -61,7 +61,20 @@ cc_library( "-Werror", "-Wextra", "-Wno-missing-field-initializers", - ], + ] + if_openmp_enabled([ + "-fopenmp", + ]) + if_neon_enabled([ + "-DMACE_ENABLE_NEON", + ]) + if_android_armv7([ + "-mfpu=neon-fp16", + "-mfloat-abi=softfp", + ]) + if_opencl_enabled([ + "-DMACE_ENABLE_OPENCL", + ]) + if_quantize_enabled([ + "-DMACE_ENABLE_QUANTIZE", + ]) + if_hexagon_enabled([ + "-DMACE_ENABLE_HEXAGON", + ]), deps = [ "//mace/core", "@gtest", @@ -89,7 +102,7 @@ cc_library( ]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ - "-mfpu=neon", + "-mfpu=neon-fp16", "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", @@ -141,7 +154,7 @@ cc_library( ]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ - "-mfpu=neon", + "-mfpu=neon-fp16", "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", @@ -186,7 +199,7 @@ cc_library( ]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ - "-mfpu=neon", + "-mfpu=neon-fp16", "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", @@ -221,7 +234,7 @@ cc_library( ]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ - "-mfpu=neon", + "-mfpu=neon-fp16", "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", @@ -256,7 +269,7 @@ cc_library( ]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ - "-mfpu=neon", + "-mfpu=neon-fp16", "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", @@ -325,7 +338,6 @@ cc_library( "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ "-mfpu=neon-fp16", - ]) + if_android_armv7([ "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", @@ -370,7 +382,6 @@ cc_library( "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ "-mfpu=neon-fp16", - ]) + if_android_armv7([ "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", diff --git a/mace/ops/arm/fp16_gemm.h b/mace/ops/arm/fp16_gemm.h index 860e259f144fa299efd6e44b4ea1377f27e38b82..878080684f288692250fff0d8f8bc64f7cea7f1d 100644 --- a/mace/ops/arm/fp16_gemm.h +++ b/mace/ops/arm/fp16_gemm.h @@ -17,15 +17,13 @@ #include "mace/core/types.h" -#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID) +#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) #include #endif -#if defined(MACE_ENABLE_NEON) && !defined(__aarch64__) -#if defined(__ANDROID__) && defined(ANDROID) +#if defined(MACE_ENABLE_NEON) && !defined(__aarch64__) && defined(__ANDROID__) #define vaddvq_f32(v) ((v)[0] + (v)[1] + (v)[2] + (v)[3]) #endif -#endif namespace mace { namespace ops { @@ -39,7 +37,7 @@ void FP16Gemv(const INPUT_TYPE_LEFT *m_ptr, const index_t width, OUTPUT_TYPE *result); -#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID) +#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) template<> void FP16Gemv(const float16_t *m_ptr, const float *v_ptr, diff --git a/mace/ops/matmul.cc b/mace/ops/matmul.cc index 98d8c30c60dc180067df6f1d58057005f5fa266e..3799eafd05274196665e34f165809cab6c5c72d8 100644 --- a/mace/ops/matmul.cc +++ b/mace/ops/matmul.cc @@ -513,7 +513,7 @@ class MatMulOp : public MatMulOpBase { }; #endif // MACE_ENABLE_OPENCL -#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID) +#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) template <> class MatMulOp : public MatMulOpBase { public: @@ -610,7 +610,7 @@ void RegisterMatMul(OpRegistryBase *op_registry) { DeviceType::GPU, half); #endif // MACE_ENABLE_OPENCL -#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID) +#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, DeviceType::CPU, float16_t); #endif // MACE_ENABLE_NEON diff --git a/mace/ops/matmul_benchmark.cc b/mace/ops/matmul_benchmark.cc index e87dc91b6ef5eea92a172371814e3e5056cd314b..087c824e389434f05fb0577146dabfa5df82fe55 100644 --- a/mace/ops/matmul_benchmark.cc +++ b/mace/ops/matmul_benchmark.cc @@ -263,13 +263,17 @@ void MatMulBenchmark( OpsTestNet net; // Add input data +#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) if (DataTypeToEnum::value == DT_FLOAT16) { net.AddRandomInput("A", {batch, height, channels}); net.AddRandomInput("B", {batch, channels, out_width}); } else { +#endif net.AddRandomInput("A", {batch, height, channels}); net.AddRandomInput("B", {batch, channels, out_width}); +#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) } +#endif net.GetTensor("A")->SetIsWeight(true); net.GetTensor("B")->SetIsWeight(true); if (DataTypeToEnum::value == DT_UINT8) { @@ -310,13 +314,17 @@ void MatMulTransposeBenchmark( OpsTestNet net; // Add input data +#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) if (DataTypeToEnum::value == DT_FLOAT16) { net.AddRandomInput("A", {batch, height, channels}); net.AddRandomInput("B", {batch, out_width, channels}); } else { +#endif net.AddRandomInput("A", {batch, height, channels}); net.AddRandomInput("B", {batch, out_width, channels}); +#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) } +#endif net.GetTensor("A")->SetIsWeight(true); net.GetTensor("B")->SetIsWeight(true); if (DataTypeToEnum::value == DT_UINT8) { @@ -388,11 +396,16 @@ void MatMulTransposeBenchmark( } \ MACE_BENCHMARK(MACE_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE) -#ifdef MACE_ENABLE_QUANTIZE +#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) #define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \ MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU); \ MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float16_t, CPU); \ MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU); +#else +#define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \ + MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU); \ + MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU); +#endif MACE_BM_MATMUL_OP(1, 30000, 256, 1); MACE_BM_MATMUL_OP(1, 128, 256, 128);