提交 0945e498 编写于 作者: 卢旭辉

Merge branch 'fp16' into 'master'

Fix fp16 delegator register

See merge request applied-machine-learning/sysml/mace!1318
...@@ -20,16 +20,6 @@ namespace mace { ...@@ -20,16 +20,6 @@ namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
extern template
MaceStatus DepthwiseConv2dK3x3S1<float16_t>::DoCompute(
const DepthwiseConvComputeParam &p, const float16_t *filter_data,
const float16_t *input_data, float16_t *output_data);
extern template
MaceStatus DepthwiseConv2dK3x3S2<float16_t>::DoCompute(
const DepthwiseConvComputeParam &p, const float16_t *filter_data,
const float16_t *input_data, float16_t *output_data);
namespace { namespace {
template<typename T> template<typename T>
void DepthwiseConv2d3x3Pixel(const T *in_base, void DepthwiseConv2d3x3Pixel(const T *in_base,
...@@ -474,16 +464,6 @@ void RegisterDepthwiseConv2dK3x3Delegator(OpDelegatorRegistry *registry) { ...@@ -474,16 +464,6 @@ void RegisterDepthwiseConv2dK3x3Delegator(OpDelegatorRegistry *registry) {
delegator::DepthwiseConv2dParam, delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU, MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU,
BFloat16, ImplType::NEON, K3x3S2)); BFloat16, ImplType::NEON, K3x3S2));
MACE_REGISTER_FP16_DELEGATOR(
registry, DepthwiseConv2dK3x3S1<float16_t>,
delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU,
float16_t, ImplType::NEON, K3x3S1));
MACE_REGISTER_FP16_DELEGATOR(
registry, DepthwiseConv2dK3x3S2<float16_t>,
delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU,
float16_t, ImplType::NEON, K3x3S2));
} }
} // namespace arm } // namespace arm
......
...@@ -23,24 +23,6 @@ namespace mace { ...@@ -23,24 +23,6 @@ namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
extern template void Gemm<float16_t>::Pack8x4(
const MatrixMap<const float16_t> &matrix,
MatrixMajor dst_major, float16_t *packed_matrix);
extern template void Gemm<float16_t>::Unpack8x8(
const float16_t *packed_output, MatrixMap<float16_t> *output);
extern template void Gemm<float16_t>::PackLhs(
const MatrixMap<const float16_t> &lhs, float16_t *packed_lhs);
extern template void Gemm<float16_t>::PackRhs(
const MatrixMap<const float16_t> &rhs, float16_t *packed_rhs);
extern template void Gemm<float16_t>::UnpackOutput(
const float16_t *packed_output, MatrixMap<float16_t> *output);
extern template MaceStatus Gemm<float16_t>::Compute(
const OpContext *context, const Tensor *lhs, const Tensor *rhs,
const index_t batch, const index_t rows, const index_t cols,
const index_t depth, const MatrixMajor lhs_major,
const MatrixMajor rhs_major, const MatrixMajor output_major,
const bool lhs_batched, const bool rhs_batched, Tensor *output);
template<typename T> template<typename T>
void Gemm<T>::Pack4x4(const MatrixMap<const T> &matrix, void Gemm<T>::Pack4x4(const MatrixMap<const T> &matrix,
MatrixMajor dst_major, T *packed_matrix) { MatrixMajor dst_major, T *packed_matrix) {
...@@ -719,12 +701,7 @@ void RegisterGemmDelegator(OpDelegatorRegistry *registry) { ...@@ -719,12 +701,7 @@ void RegisterGemmDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_BF16_DELEGATOR( MACE_REGISTER_BF16_DELEGATOR(
registry, Gemm<BFloat16>, delegator::GemmParam, registry, Gemm<BFloat16>, delegator::GemmParam,
MACE_DELEGATOR_KEY(Gemm, DeviceType::CPU, BFloat16, ImplType::NEON)); MACE_DELEGATOR_KEY(Gemm, DeviceType::CPU, BFloat16, ImplType::NEON));
MACE_REGISTER_FP16_DELEGATOR(
registry, Gemm<float16_t>, delegator::GemmParam,
MACE_DELEGATOR_KEY(Gemm, DeviceType::CPU, float16_t, ImplType::NEON));
} }
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
...@@ -403,6 +403,18 @@ MaceStatus DepthwiseConv2dK3x3S2<float16_t>::DoCompute( ...@@ -403,6 +403,18 @@ MaceStatus DepthwiseConv2dK3x3S2<float16_t>::DoCompute(
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
void RegisterFP16DepthwiseConv2dK3x3Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_FP16_DELEGATOR(
registry, DepthwiseConv2dK3x3S1<float16_t>,
delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU,
float16_t, ImplType::NEON, K3x3S1));
MACE_REGISTER_FP16_DELEGATOR(
registry, DepthwiseConv2dK3x3S2<float16_t>,
delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU,
float16_t, ImplType::NEON, K3x3S2));
}
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
......
此差异已折叠。
...@@ -69,7 +69,11 @@ extern void RegisterGroupDeconv2dGeneralDelegator( ...@@ -69,7 +69,11 @@ extern void RegisterGroupDeconv2dGeneralDelegator(
extern void RegisterGemmDelegator(OpDelegatorRegistry *registry); extern void RegisterGemmDelegator(OpDelegatorRegistry *registry);
extern void RegisterGemvDelegator(OpDelegatorRegistry *registry); extern void RegisterGemvDelegator(OpDelegatorRegistry *registry);
#ifdef MACE_ENABLE_FP16
extern void RegisterFP16DepthwiseConv2dK3x3Delegator(
OpDelegatorRegistry *registry);
extern void RegisterFP16GemmDelegator(OpDelegatorRegistry *registry);
#endif
#ifdef MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_QUANTIZE
namespace q8 { namespace q8 {
extern void RegisterEltwiseDelegator(OpDelegatorRegistry *registry); extern void RegisterEltwiseDelegator(OpDelegatorRegistry *registry);
...@@ -89,7 +93,6 @@ void RegisterAllOpDelegators(OpDelegatorRegistry *registry) { ...@@ -89,7 +93,6 @@ void RegisterAllOpDelegators(OpDelegatorRegistry *registry) {
ref::RegisterDepthwiseDeconv2dDelegator(registry); ref::RegisterDepthwiseDeconv2dDelegator(registry);
ref::RegisterGemmDelegator(registry); ref::RegisterGemmDelegator(registry);
ref::RegisterGemvDelegator(registry); ref::RegisterGemvDelegator(registry);
#ifdef MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_QUANTIZE
ref::q8::RegisterEltwiseDelegator(registry); ref::q8::RegisterEltwiseDelegator(registry);
ref::q8::RegisterGemvDelegator(registry); ref::q8::RegisterGemvDelegator(registry);
...@@ -123,7 +126,10 @@ void RegisterAllOpDelegators(OpDelegatorRegistry *registry) { ...@@ -123,7 +126,10 @@ void RegisterAllOpDelegators(OpDelegatorRegistry *registry) {
arm::RegisterGemmDelegator(registry); arm::RegisterGemmDelegator(registry);
arm::RegisterGemvDelegator(registry); arm::RegisterGemvDelegator(registry);
#ifdef MACE_ENABLE_FP16
arm::RegisterFP16DepthwiseConv2dK3x3Delegator(registry);
arm::RegisterFP16GemmDelegator(registry);
#endif
#ifdef MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_QUANTIZE
arm::q8::RegisterEltwiseDelegator(registry); arm::q8::RegisterEltwiseDelegator(registry);
arm::q8::RegisterGemvDelegator(registry); arm::q8::RegisterGemvDelegator(registry);
......
...@@ -115,7 +115,7 @@ $(echo "$1" | cut -d '=' -f -1)" ...@@ -115,7 +115,7 @@ $(echo "$1" | cut -d '=' -f -1)"
esac esac
done done
if [[ "${enable_apu}" == true ]];then if [[ "${enable_apu}" == true || "${abi}" != armeabi-v7a || "${abi}" != arm64-v8a ]];then
enable_rpcmem=false enable_rpcmem=false
fi fi
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册