提交 0945e498 编写于 作者: 卢旭辉

Merge branch 'fp16' into 'master'

Fix fp16 delegator register

See merge request applied-machine-learning/sysml/mace!1318
......@@ -20,16 +20,6 @@ namespace mace {
namespace ops {
namespace arm {
extern template
MaceStatus DepthwiseConv2dK3x3S1<float16_t>::DoCompute(
const DepthwiseConvComputeParam &p, const float16_t *filter_data,
const float16_t *input_data, float16_t *output_data);
extern template
MaceStatus DepthwiseConv2dK3x3S2<float16_t>::DoCompute(
const DepthwiseConvComputeParam &p, const float16_t *filter_data,
const float16_t *input_data, float16_t *output_data);
namespace {
template<typename T>
void DepthwiseConv2d3x3Pixel(const T *in_base,
......@@ -474,16 +464,6 @@ void RegisterDepthwiseConv2dK3x3Delegator(OpDelegatorRegistry *registry) {
delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU,
BFloat16, ImplType::NEON, K3x3S2));
MACE_REGISTER_FP16_DELEGATOR(
registry, DepthwiseConv2dK3x3S1<float16_t>,
delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU,
float16_t, ImplType::NEON, K3x3S1));
MACE_REGISTER_FP16_DELEGATOR(
registry, DepthwiseConv2dK3x3S2<float16_t>,
delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU,
float16_t, ImplType::NEON, K3x3S2));
}
} // namespace arm
......
......@@ -23,24 +23,6 @@ namespace mace {
namespace ops {
namespace arm {
extern template void Gemm<float16_t>::Pack8x4(
const MatrixMap<const float16_t> &matrix,
MatrixMajor dst_major, float16_t *packed_matrix);
extern template void Gemm<float16_t>::Unpack8x8(
const float16_t *packed_output, MatrixMap<float16_t> *output);
extern template void Gemm<float16_t>::PackLhs(
const MatrixMap<const float16_t> &lhs, float16_t *packed_lhs);
extern template void Gemm<float16_t>::PackRhs(
const MatrixMap<const float16_t> &rhs, float16_t *packed_rhs);
extern template void Gemm<float16_t>::UnpackOutput(
const float16_t *packed_output, MatrixMap<float16_t> *output);
extern template MaceStatus Gemm<float16_t>::Compute(
const OpContext *context, const Tensor *lhs, const Tensor *rhs,
const index_t batch, const index_t rows, const index_t cols,
const index_t depth, const MatrixMajor lhs_major,
const MatrixMajor rhs_major, const MatrixMajor output_major,
const bool lhs_batched, const bool rhs_batched, Tensor *output);
template<typename T>
void Gemm<T>::Pack4x4(const MatrixMap<const T> &matrix,
MatrixMajor dst_major, T *packed_matrix) {
......@@ -719,12 +701,7 @@ void RegisterGemmDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_BF16_DELEGATOR(
registry, Gemm<BFloat16>, delegator::GemmParam,
MACE_DELEGATOR_KEY(Gemm, DeviceType::CPU, BFloat16, ImplType::NEON));
MACE_REGISTER_FP16_DELEGATOR(
registry, Gemm<float16_t>, delegator::GemmParam,
MACE_DELEGATOR_KEY(Gemm, DeviceType::CPU, float16_t, ImplType::NEON));
}
} // namespace arm
} // namespace ops
} // namespace mace
......@@ -403,6 +403,18 @@ MaceStatus DepthwiseConv2dK3x3S2<float16_t>::DoCompute(
return MaceStatus::MACE_SUCCESS;
}
void RegisterFP16DepthwiseConv2dK3x3Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_FP16_DELEGATOR(
registry, DepthwiseConv2dK3x3S1<float16_t>,
delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU,
float16_t, ImplType::NEON, K3x3S1));
MACE_REGISTER_FP16_DELEGATOR(
registry, DepthwiseConv2dK3x3S2<float16_t>,
delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU,
float16_t, ImplType::NEON, K3x3S2));
}
} // namespace arm
} // namespace ops
......
此差异已折叠。
......@@ -69,7 +69,11 @@ extern void RegisterGroupDeconv2dGeneralDelegator(
extern void RegisterGemmDelegator(OpDelegatorRegistry *registry);
extern void RegisterGemvDelegator(OpDelegatorRegistry *registry);
#ifdef MACE_ENABLE_FP16
extern void RegisterFP16DepthwiseConv2dK3x3Delegator(
OpDelegatorRegistry *registry);
extern void RegisterFP16GemmDelegator(OpDelegatorRegistry *registry);
#endif
#ifdef MACE_ENABLE_QUANTIZE
namespace q8 {
extern void RegisterEltwiseDelegator(OpDelegatorRegistry *registry);
......@@ -89,7 +93,6 @@ void RegisterAllOpDelegators(OpDelegatorRegistry *registry) {
ref::RegisterDepthwiseDeconv2dDelegator(registry);
ref::RegisterGemmDelegator(registry);
ref::RegisterGemvDelegator(registry);
#ifdef MACE_ENABLE_QUANTIZE
ref::q8::RegisterEltwiseDelegator(registry);
ref::q8::RegisterGemvDelegator(registry);
......@@ -123,7 +126,10 @@ void RegisterAllOpDelegators(OpDelegatorRegistry *registry) {
arm::RegisterGemmDelegator(registry);
arm::RegisterGemvDelegator(registry);
#ifdef MACE_ENABLE_FP16
arm::RegisterFP16DepthwiseConv2dK3x3Delegator(registry);
arm::RegisterFP16GemmDelegator(registry);
#endif
#ifdef MACE_ENABLE_QUANTIZE
arm::q8::RegisterEltwiseDelegator(registry);
arm::q8::RegisterGemvDelegator(registry);
......
......@@ -115,7 +115,7 @@ $(echo "$1" | cut -d '=' -f -1)"
esac
done
if [[ "${enable_apu}" == true ]];then
if [[ "${enable_apu}" == true || "${abi}" != armeabi-v7a || "${abi}" != arm64-v8a ]];then
enable_rpcmem=false
fi
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册