From d52d3ae7783b8eb6045c51dbf9bb81ab312bb03f Mon Sep 17 00:00:00 2001 From: luxuhui Date: Fri, 20 Mar 2020 15:22:18 +0800 Subject: [PATCH] fix tuning&fp16 bug N/A Signed-off-by: Luxuhui --- mace/ops/arm/fp16/gemv.h | 11 +++++++++-- mace/ops/matmul.cc | 8 ++++---- tools/device.py | 2 +- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/mace/ops/arm/fp16/gemv.h b/mace/ops/arm/fp16/gemv.h index 8e7e2a3c..aa5add8c 100644 --- a/mace/ops/arm/fp16/gemv.h +++ b/mace/ops/arm/fp16/gemv.h @@ -15,6 +15,13 @@ #ifndef MACE_OPS_ARM_FP16_GEMV_H_ #define MACE_OPS_ARM_FP16_GEMV_H_ +#if defined(MACE_ENABLE_NEON) && \ + defined(__ARM_FP16_FORMAT_IEEE) && (__ARM_FP & 2) +// TODO(lichao): replace it with global macro +#define MACE_ENABLE_FP16_NEON +#endif + + #include "mace/core/types.h" #if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) @@ -37,7 +44,7 @@ void FP16Gemv(const INPUT_TYPE_LEFT *m_ptr, const index_t width, OUTPUT_TYPE *result); -#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) +#if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__) template<> void FP16Gemv(const float16_t *m_ptr, const float *v_ptr, @@ -112,7 +119,7 @@ void FP16Gemv(const float16_t *m_ptr, *out_ptr0++ = sum0; } } -#endif +#endif // MACE_ENABLE_FP16_NEON && __ANDROID__ } // namespace ops } // namespace mace diff --git a/mace/ops/matmul.cc b/mace/ops/matmul.cc index 394ace00..1c97279e 100644 --- a/mace/ops/matmul.cc +++ b/mace/ops/matmul.cc @@ -513,7 +513,7 @@ class MatMulOp : public MatMulOpBase { }; #endif // MACE_ENABLE_OPENCL -#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) +#if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__) template <> class MatMulOp : public MatMulOpBase { public: @@ -590,7 +590,7 @@ class MatMulOp : public MatMulOpBase { private: }; -#endif // MACE_ENABLE_NEON +#endif // MACE_ENABLE_FP16_NEON void RegisterMatMul(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, @@ -601,10 +601,10 @@ void RegisterMatMul(OpRegistryBase *op_registry) { DeviceType::CPU, uint8_t); #endif // MACE_ENABLE_QUANTIZE -#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) +#if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__) MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, DeviceType::CPU, float16_t); -#endif // MACE_ENABLE_NEON +#endif // MACE_ENABLE_FP16_NEON && __ANDROID__ } } // namespace ops diff --git a/tools/device.py b/tools/device.py index dc51e4ae..55cf3a75 100644 --- a/tools/device.py +++ b/tools/device.py @@ -271,7 +271,7 @@ class DeviceWrapper: if device_type == common.DeviceType.GPU: if os.path.exists(opencl_binary_file): - self.push(opencl_binary_file, self.data_dir) + self.push(opencl_binary_file, self.interior_dir) if os.path.exists(opencl_parameter_file): self.push(opencl_parameter_file, self.data_dir) -- GitLab