diff --git a/mace/ops/arm/fp16/gemv.h b/mace/ops/arm/fp16/gemv.h index 8e7e2a3c91438303f6724b740f16739db2ed5ebc..aa5add8cf684a13c1a036c8784eda10298b163f0 100644 --- a/mace/ops/arm/fp16/gemv.h +++ b/mace/ops/arm/fp16/gemv.h @@ -15,6 +15,13 @@ #ifndef MACE_OPS_ARM_FP16_GEMV_H_ #define MACE_OPS_ARM_FP16_GEMV_H_ +#if defined(MACE_ENABLE_NEON) && \ + defined(__ARM_FP16_FORMAT_IEEE) && (__ARM_FP & 2) +// TODO(lichao): replace it with global macro +#define MACE_ENABLE_FP16_NEON +#endif + + #include "mace/core/types.h" #if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) @@ -37,7 +44,7 @@ void FP16Gemv(const INPUT_TYPE_LEFT *m_ptr, const index_t width, OUTPUT_TYPE *result); -#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) +#if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__) template<> void FP16Gemv(const float16_t *m_ptr, const float *v_ptr, @@ -112,7 +119,7 @@ void FP16Gemv(const float16_t *m_ptr, *out_ptr0++ = sum0; } } -#endif +#endif // MACE_ENABLE_FP16_NEON && __ANDROID__ } // namespace ops } // namespace mace diff --git a/mace/ops/matmul.cc b/mace/ops/matmul.cc index 394ace009931cb000ede82606b5a60b46edce35d..1c97279e90f3ccd5792c1ea866729ef0842b9bb4 100644 --- a/mace/ops/matmul.cc +++ b/mace/ops/matmul.cc @@ -513,7 +513,7 @@ class MatMulOp : public MatMulOpBase { }; #endif // MACE_ENABLE_OPENCL -#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) +#if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__) template <> class MatMulOp : public MatMulOpBase { public: @@ -590,7 +590,7 @@ class MatMulOp : public MatMulOpBase { private: }; -#endif // MACE_ENABLE_NEON +#endif // MACE_ENABLE_FP16_NEON void RegisterMatMul(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, @@ -601,10 +601,10 @@ void RegisterMatMul(OpRegistryBase *op_registry) { DeviceType::CPU, uint8_t); #endif // MACE_ENABLE_QUANTIZE -#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) +#if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__) MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, DeviceType::CPU, float16_t); -#endif // MACE_ENABLE_NEON +#endif // MACE_ENABLE_FP16_NEON && __ANDROID__ } } // namespace ops diff --git a/tools/device.py b/tools/device.py index dc51e4aec22e02e034e8849302bdeffe48a51bda..55cf3a75f77cc0343698d0cf1128cbef0044774f 100644 --- a/tools/device.py +++ b/tools/device.py @@ -271,7 +271,7 @@ class DeviceWrapper: if device_type == common.DeviceType.GPU: if os.path.exists(opencl_binary_file): - self.push(opencl_binary_file, self.data_dir) + self.push(opencl_binary_file, self.interior_dir) if os.path.exists(opencl_parameter_file): self.push(opencl_parameter_file, self.data_dir)