提交 74655928 编写于 作者: 李超

Merge branch 'tuning' into 'master'

fix tuning&fp16 bug

See merge request deep-computing/mace!1254
...@@ -15,6 +15,13 @@ ...@@ -15,6 +15,13 @@
#ifndef MACE_OPS_ARM_FP16_GEMV_H_ #ifndef MACE_OPS_ARM_FP16_GEMV_H_
#define MACE_OPS_ARM_FP16_GEMV_H_ #define MACE_OPS_ARM_FP16_GEMV_H_
#if defined(MACE_ENABLE_NEON) && \
defined(__ARM_FP16_FORMAT_IEEE) && (__ARM_FP & 2)
// TODO(lichao): replace it with global macro
#define MACE_ENABLE_FP16_NEON
#endif
#include "mace/core/types.h" #include "mace/core/types.h"
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) #if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
...@@ -37,7 +44,7 @@ void FP16Gemv(const INPUT_TYPE_LEFT *m_ptr, ...@@ -37,7 +44,7 @@ void FP16Gemv(const INPUT_TYPE_LEFT *m_ptr,
const index_t width, const index_t width,
OUTPUT_TYPE *result); OUTPUT_TYPE *result);
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) #if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__)
template<> template<>
void FP16Gemv<float16_t, float, float>(const float16_t *m_ptr, void FP16Gemv<float16_t, float, float>(const float16_t *m_ptr,
const float *v_ptr, const float *v_ptr,
...@@ -112,7 +119,7 @@ void FP16Gemv<float16_t, float, float>(const float16_t *m_ptr, ...@@ -112,7 +119,7 @@ void FP16Gemv<float16_t, float, float>(const float16_t *m_ptr,
*out_ptr0++ = sum0; *out_ptr0++ = sum0;
} }
} }
#endif #endif // MACE_ENABLE_FP16_NEON && __ANDROID__
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
......
...@@ -513,7 +513,7 @@ class MatMulOp<DeviceType::GPU, float> : public MatMulOpBase { ...@@ -513,7 +513,7 @@ class MatMulOp<DeviceType::GPU, float> : public MatMulOpBase {
}; };
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) #if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__)
template <> template <>
class MatMulOp<CPU, float16_t> : public MatMulOpBase { class MatMulOp<CPU, float16_t> : public MatMulOpBase {
public: public:
...@@ -590,7 +590,7 @@ class MatMulOp<CPU, float16_t> : public MatMulOpBase { ...@@ -590,7 +590,7 @@ class MatMulOp<CPU, float16_t> : public MatMulOpBase {
private: private:
}; };
#endif // MACE_ENABLE_NEON #endif // MACE_ENABLE_FP16_NEON
void RegisterMatMul(OpRegistryBase *op_registry) { void RegisterMatMul(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
...@@ -601,10 +601,10 @@ void RegisterMatMul(OpRegistryBase *op_registry) { ...@@ -601,10 +601,10 @@ void RegisterMatMul(OpRegistryBase *op_registry) {
DeviceType::CPU, uint8_t); DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE #endif // MACE_ENABLE_QUANTIZE
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) #if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__)
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
DeviceType::CPU, float16_t); DeviceType::CPU, float16_t);
#endif // MACE_ENABLE_NEON #endif // MACE_ENABLE_FP16_NEON && __ANDROID__
} }
} // namespace ops } // namespace ops
......
...@@ -271,7 +271,7 @@ class DeviceWrapper: ...@@ -271,7 +271,7 @@ class DeviceWrapper:
if device_type == common.DeviceType.GPU: if device_type == common.DeviceType.GPU:
if os.path.exists(opencl_binary_file): if os.path.exists(opencl_binary_file):
self.push(opencl_binary_file, self.data_dir) self.push(opencl_binary_file, self.interior_dir)
if os.path.exists(opencl_parameter_file): if os.path.exists(opencl_parameter_file):
self.push(opencl_parameter_file, self.data_dir) self.push(opencl_parameter_file, self.data_dir)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册