Merge branch 'tuning' into 'master'

fix tuning&fp16 bug See merge request deep-computing/mace!1254

Merge branch 'tuning' into 'master'
fix tuning&fp16 bug See merge request deep-computing/mace!1254
74655928 · 李超 · d9275275 · d52d3ae7 · 74655928 · 74655928
显示空白变更内容
内联并排

Showing with 14 addition and 7 deletion

mace/ops/arm/fp16/gemv.h mace/ops/arm/fp16/gemv.h +9 -2

mace/ops/matmul.cc mace/ops/matmul.cc +4 -4

tools/device.py tools/device.py +1 -1

未找到文件。
--- a/mace/ops/arm/fp16/gemv.h
+++ b/mace/ops/arm/fp16/gemv.h
@@ -15,6 +15,13 @@
 #ifndef MACE_OPS_ARM_FP16_GEMV_H_
 #define MACE_OPS_ARM_FP16_GEMV_H_
+#if defined(MACE_ENABLE_NEON) && \
+    defined(__ARM_FP16_FORMAT_IEEE) && (__ARM_FP & 2)
+// TODO(lichao): replace it with global macro
+#define MACE_ENABLE_FP16_NEON
+#endif
 #include "mace/core/types.h"
 #if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
@@ -37,7 +44,7 @@ void FP16Gemv(const INPUT_TYPE_LEFT *m_ptr,
              const index_t width,
              OUTPUT_TYPE *result);
-#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
+#if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__)
 template<>
 void FP16Gemv<float16_t, float, float>(const float16_t *m_ptr,
                                       const float *v_ptr,
@@ -112,7 +119,7 @@ void FP16Gemv<float16_t, float, float>(const float16_t *m_ptr,
    *out_ptr0++ = sum0;
  }
 }
-#endif
+#endif  // MACE_ENABLE_FP16_NEON && __ANDROID__
 }  // namespace ops
 }  // namespace mace

--- a/mace/ops/matmul.cc
+++ b/mace/ops/matmul.cc
@@ -513,7 +513,7 @@ class MatMulOp<DeviceType::GPU, float> : public MatMulOpBase {
 };
 #endif  // MACE_ENABLE_OPENCL
-#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
+#if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__)
 template <>
 class MatMulOp<CPU, float16_t> : public MatMulOpBase {
 public:
@@ -590,7 +590,7 @@ class MatMulOp<CPU, float16_t> : public MatMulOpBase {
 private:
 };
-#endif  // MACE_ENABLE_NEON
+#endif  // MACE_ENABLE_FP16_NEON
 void RegisterMatMul(OpRegistryBase *op_registry) {
  MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
@@ -601,10 +601,10 @@ void RegisterMatMul(OpRegistryBase *op_registry) {
                   DeviceType::CPU, uint8_t);
 #endif  // MACE_ENABLE_QUANTIZE
-#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
+#if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__)
  MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
                   DeviceType::CPU, float16_t);
-#endif  // MACE_ENABLE_NEON
+#endif  // MACE_ENABLE_FP16_NEON && __ANDROID__
 }
 }  // namespace ops

--- a/tools/device.py
+++ b/tools/device.py
@@ -271,7 +271,7 @@ class DeviceWrapper:
            if device_type == common.DeviceType.GPU:
                if os.path.exists(opencl_binary_file):
-                    self.push(opencl_binary_file, self.data_dir)
+                    self.push(opencl_binary_file, self.interior_dir)
                if os.path.exists(opencl_parameter_file):
                    self.push(opencl_parameter_file, self.data_dir)