提交 00febe43 编写于 作者: L liukai6

fix fp16 bug

上级 0c5aab67
......@@ -46,7 +46,7 @@ namespace mace {
break; \
}
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
#define MACE_TYPE_ENUM_SWITCH_CASE_NEON(STATEMENTS) \
MACE_CASE(float16_t, MACE_SINGLE_ARG(STATEMENTS))
#else
......
......@@ -47,7 +47,7 @@ size_t GetEnumTypeSize(const DataType dt) {
return sizeof(float);
case DT_HALF:
return sizeof(half);
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
case DT_FLOAT16:
return sizeof(float16_t);
#endif
......
......@@ -17,7 +17,7 @@
#include <cstdint>
#include <string>
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
#include <arm_neon.h>
#endif
......@@ -54,7 +54,7 @@ struct EnumToDataType;
};
MACE_MAPPING_DATA_TYPE_AND_ENUM(half, DT_HALF);
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
MACE_MAPPING_DATA_TYPE_AND_ENUM(float16_t, DT_FLOAT16);
#endif
MACE_MAPPING_DATA_TYPE_AND_ENUM(float, DT_FLOAT);
......
......@@ -38,7 +38,7 @@ cc_library(
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
"-mfpu=neon-fp16",
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
......@@ -61,7 +61,20 @@ cc_library(
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
] + if_openmp_enabled([
"-fopenmp",
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon-fp16",
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]) + if_quantize_enabled([
"-DMACE_ENABLE_QUANTIZE",
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
deps = [
"//mace/core",
"@gtest",
......@@ -89,7 +102,7 @@ cc_library(
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
"-mfpu=neon-fp16",
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
......@@ -141,7 +154,7 @@ cc_library(
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
"-mfpu=neon-fp16",
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
......@@ -186,7 +199,7 @@ cc_library(
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
"-mfpu=neon-fp16",
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
......@@ -221,7 +234,7 @@ cc_library(
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
"-mfpu=neon-fp16",
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
......@@ -256,7 +269,7 @@ cc_library(
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
"-mfpu=neon-fp16",
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
......@@ -325,7 +338,6 @@ cc_library(
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon-fp16",
]) + if_android_armv7([
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
......@@ -370,7 +382,6 @@ cc_library(
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon-fp16",
]) + if_android_armv7([
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
......
......@@ -17,15 +17,13 @@
#include "mace/core/types.h"
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
#include <arm_neon.h>
#endif
#if defined(MACE_ENABLE_NEON) && !defined(__aarch64__)
#if defined(__ANDROID__) && defined(ANDROID)
#if defined(MACE_ENABLE_NEON) && !defined(__aarch64__) && defined(__ANDROID__)
#define vaddvq_f32(v) ((v)[0] + (v)[1] + (v)[2] + (v)[3])
#endif
#endif
namespace mace {
namespace ops {
......@@ -39,7 +37,7 @@ void FP16Gemv(const INPUT_TYPE_LEFT *m_ptr,
const index_t width,
OUTPUT_TYPE *result);
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
template<>
void FP16Gemv<float16_t, float, float>(const float16_t *m_ptr,
const float *v_ptr,
......
......@@ -513,7 +513,7 @@ class MatMulOp<DeviceType::GPU, T> : public MatMulOpBase {
};
#endif // MACE_ENABLE_OPENCL
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
template <>
class MatMulOp<CPU, float16_t> : public MatMulOpBase {
public:
......@@ -610,7 +610,7 @@ void RegisterMatMul(OpRegistryBase *op_registry) {
DeviceType::GPU, half);
#endif // MACE_ENABLE_OPENCL
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
DeviceType::CPU, float16_t);
#endif // MACE_ENABLE_NEON
......
......@@ -263,13 +263,17 @@ void MatMulBenchmark(
OpsTestNet net;
// Add input data
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
if (DataTypeToEnum<T>::value == DT_FLOAT16) {
net.AddRandomInput<D, float16_t>("A", {batch, height, channels});
net.AddRandomInput<D, float>("B", {batch, channels, out_width});
} else {
#endif
net.AddRandomInput<D, T>("A", {batch, height, channels});
net.AddRandomInput<D, T>("B", {batch, channels, out_width});
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
}
#endif
net.GetTensor("A")->SetIsWeight(true);
net.GetTensor("B")->SetIsWeight(true);
if (DataTypeToEnum<T>::value == DT_UINT8) {
......@@ -310,13 +314,17 @@ void MatMulTransposeBenchmark(
OpsTestNet net;
// Add input data
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
if (DataTypeToEnum<T>::value == DT_FLOAT16) {
net.AddRandomInput<D, float>("A", {batch, height, channels});
net.AddRandomInput<D, float16_t>("B", {batch, out_width, channels});
} else {
#endif
net.AddRandomInput<D, T>("A", {batch, height, channels});
net.AddRandomInput<D, float>("B", {batch, out_width, channels});
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
}
#endif
net.GetTensor("A")->SetIsWeight(true);
net.GetTensor("B")->SetIsWeight(true);
if (DataTypeToEnum<T>::value == DT_UINT8) {
......@@ -388,11 +396,16 @@ void MatMulTransposeBenchmark(
} \
MACE_BENCHMARK(MACE_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_QUANTIZE
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
#define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU); \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float16_t, CPU); \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU);
#else
#define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU); \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU);
#endif
MACE_BM_MATMUL_OP(1, 30000, 256, 1);
MACE_BM_MATMUL_OP(1, 128, 256, 128);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册