From 08254c538d686b1855245a0fb1203661f904a0de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=AF=85?= Date: Fri, 1 Sep 2017 13:51:13 +0800 Subject: [PATCH] Add relu neon --- mace/kernels/BUILD | 22 ++++++++++++++++++++ mace/kernels/neon/relu_neon.cc | 30 +++++++++++++++++++++++++++ mace/kernels/neon/relu_neon.h | 19 +++++++++++++++++ mace/kernels/relu.h | 28 +++++++++++++++++++++++++ mace/ops/BUILD | 1 + mace/ops/relu.cc | 38 +++++++++++++--------------------- 6 files changed, 114 insertions(+), 24 deletions(-) create mode 100644 mace/kernels/BUILD create mode 100644 mace/kernels/neon/relu_neon.cc create mode 100644 mace/kernels/neon/relu_neon.h create mode 100644 mace/kernels/relu.h diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD new file mode 100644 index 00000000..a842356a --- /dev/null +++ b/mace/kernels/BUILD @@ -0,0 +1,22 @@ +# Description: +# Mace neon kernels. +# +package( + default_visibility = ["//visibility:public"], +) + + +licenses(["notice"]) # Apache 2.0 + +load("//mace:mace.bzl", "if_android") + +cc_library( + name = "kernels", + srcs = glob(["*.cc"]) + if_android(glob(["neon/*.cc"])), + hdrs = glob(["*.h"]) + if_android(glob(["neon/*.h"])), + deps = [ + "//mace/core:core", + ], + copts = ['-std=c++11'], +) + diff --git a/mace/kernels/neon/relu_neon.cc b/mace/kernels/neon/relu_neon.cc new file mode 100644 index 00000000..84542217 --- /dev/null +++ b/mace/kernels/neon/relu_neon.cc @@ -0,0 +1,30 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include +#include "mace/kernels/neon/relu_neon.h" + +namespace mace { +namespace kernels{ + +void NeonReluFuntion_float(const Tensor *input_tensor, + Tensor *output_tensor) { + int64 size = input_tensor->size(); + output_tensor->ResizeLike(input_tensor); + const float* input = input_tensor->data(); + float* output = output_tensor->mutable_data(); + + float32x4_t _zero = vdupq_n_f32(0.f); + for (; size > 0; size--) { + float32x4_t _inp = vld1q_f32(input); + float32x4_t _outp = vmaxq_f32(_inp, _zero); + vst1q_f32(output, _outp); + + input += 4; + output += 4; + } +} + +} // namespace kernels +} // namespace mace \ No newline at end of file diff --git a/mace/kernels/neon/relu_neon.h b/mace/kernels/neon/relu_neon.h new file mode 100644 index 00000000..0be3be6f --- /dev/null +++ b/mace/kernels/neon/relu_neon.h @@ -0,0 +1,19 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#ifndef MACE_KERNELS_RELU_NEON_H_ +#define MACE_KERNELS_RELU_NEON_H_ + +#include "mace/core/tensor.h" + +namespace mace { +namespace kernels { + +void NeonReluFuntion_float(const Tensor *input_tensor, + Tensor *output_tensor); + +} // namespace kernels +} // namespace mace + +#endif // MACE_KERNELS_RELU_NEON_H_ diff --git a/mace/kernels/relu.h b/mace/kernels/relu.h new file mode 100644 index 00000000..cc613f1d --- /dev/null +++ b/mace/kernels/relu.h @@ -0,0 +1,28 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#ifndef MACE_KERNELS_RELU_H_ +#define MACE_KERNELS_RELU_H_ + +#include "mace/core/tensor.h" + +namespace mace { +namespace kernels { + +template +void ReluFuntion(const Tensor *input_tensor, Tensor *output_tensor) { + int64 size = input_tensor->size(); + output_tensor->ResizeLike(input_tensor); + const float* input = input_tensor->data(); + float* output = output_tensor->mutable_data(); + + for (int64 i = 0; i < size; ++i) { + output[i] = std::max(input[i], static_cast(0)); + } +} + +} // namespace kernels +} // namespace mace + +#endif // MACE_KERNELS_RELU_H_ \ No newline at end of file diff --git a/mace/ops/BUILD b/mace/ops/BUILD index 7cde44fa..1acbc1fd 100644 --- a/mace/ops/BUILD +++ b/mace/ops/BUILD @@ -17,6 +17,7 @@ cc_library( deps = [ "//mace/proto:cc_proto", "//mace/core:core", + "//mace/kernels:kernels", ], copts = ['-std=c++11'], alwayslink = 1, diff --git a/mace/ops/relu.cc b/mace/ops/relu.cc index 66662f38..59d4e3b7 100644 --- a/mace/ops/relu.cc +++ b/mace/ops/relu.cc @@ -4,42 +4,32 @@ #include "mace/ops/relu.h" #include "mace/proto/mace.pb.h" +#include "mace/kernels/relu.h" +#if __ARM_NEON +#include "mace/kernels/neon/relu_neon.h" +#endif // __ARM_NEON namespace mace { template <> bool ReluOp::Run() { - const Tensor* X = Input(0); - Tensor* Y = Output(0); - Y->ResizeLike(X); - - const float* Xdata = X-> data(); - float* Ydata = Y->mutable_data(); - for (int i = 0; i < X->size(); ++i) { - Ydata[i] = std::max(Xdata[i], 0.f); - VLOG(0) << i << ": " << Xdata[i] << " " << Ydata[i]; - } - + const Tensor* input_tensor = Input(0); + Tensor* output_tensor = Output(0); + kernels::ReluFuntion(input_tensor, output_tensor); return true; } +REGISTER_CPU_OPERATOR(Relu, ReluOp); + +#if __ARM_NEON template <> bool ReluOp::Run() { - const Tensor* X = Input(0); - Tensor* Y = Output(0); - Y->ResizeLike(X); - - const float* Xdata = X-> data(); - float* Ydata = Y->mutable_data(); - for (int i = 0; i < X->size(); ++i) { - Ydata[i] = std::max(Xdata[i], 0.f); - VLOG(0) << i << ": " << Xdata[i] << " " << Ydata[i]; - } - + const Tensor* input_tensor = Input(0); + Tensor* output_tensor = Output(0); + kernels::NeonReluFuntion_float(input_tensor, output_tensor); return true; } - -REGISTER_CPU_OPERATOR(Relu, ReluOp); REGISTER_NEON_OPERATOR(Relu, ReluOp); +#endif // __ARM_NEON } // namespace mace -- GitLab