提交 74655928 编写于 作者: 李超

Merge branch 'tuning' into 'master'

fix tuning&fp16 bug

See merge request deep-computing/mace!1254
......@@ -15,6 +15,13 @@
#ifndef MACE_OPS_ARM_FP16_GEMV_H_
#define MACE_OPS_ARM_FP16_GEMV_H_
#if defined(MACE_ENABLE_NEON) && \
defined(__ARM_FP16_FORMAT_IEEE) && (__ARM_FP & 2)
// TODO(lichao): replace it with global macro
#define MACE_ENABLE_FP16_NEON
#endif
#include "mace/core/types.h"
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
......@@ -37,7 +44,7 @@ void FP16Gemv(const INPUT_TYPE_LEFT *m_ptr,
const index_t width,
OUTPUT_TYPE *result);
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
#if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__)
template<>
void FP16Gemv<float16_t, float, float>(const float16_t *m_ptr,
const float *v_ptr,
......@@ -112,7 +119,7 @@ void FP16Gemv<float16_t, float, float>(const float16_t *m_ptr,
*out_ptr0++ = sum0;
}
}
#endif
#endif // MACE_ENABLE_FP16_NEON && __ANDROID__
} // namespace ops
} // namespace mace
......
......@@ -513,7 +513,7 @@ class MatMulOp<DeviceType::GPU, float> : public MatMulOpBase {
};
#endif // MACE_ENABLE_OPENCL
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
#if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__)
template <>
class MatMulOp<CPU, float16_t> : public MatMulOpBase {
public:
......@@ -590,7 +590,7 @@ class MatMulOp<CPU, float16_t> : public MatMulOpBase {
private:
};
#endif // MACE_ENABLE_NEON
#endif // MACE_ENABLE_FP16_NEON
void RegisterMatMul(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
......@@ -601,10 +601,10 @@ void RegisterMatMul(OpRegistryBase *op_registry) {
DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
#if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__)
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
DeviceType::CPU, float16_t);
#endif // MACE_ENABLE_NEON
#endif // MACE_ENABLE_FP16_NEON && __ANDROID__
}
} // namespace ops
......
......@@ -271,7 +271,7 @@ class DeviceWrapper:
if device_type == common.DeviceType.GPU:
if os.path.exists(opencl_binary_file):
self.push(opencl_binary_file, self.data_dir)
self.push(opencl_binary_file, self.interior_dir)
if os.path.exists(opencl_parameter_file):
self.push(opencl_parameter_file, self.data_dir)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册